Convert open/fetch to stream (#2028)

* Types lined up for open with stream * Chunking stream * Maybe I didn't need most of the Stream stuff after all? * Some clean-up * Merge weird cargo.lock * Start moving some encoding logic to MaybeTextCodec Will we lose the nice table formatting if we Stream? How do we get it back? Collect the Stream at the end? * Clean-up and small refinements * Put in auto-convert workaround * Workaround to make sure bat functionality works * Handle some easy error cases * All tests pass * Remove guessing logic * Address clippy comments * Pull latest master and fix MaybeTextCodec usage * Add tag to enable autoview
2024-11-15 01:17:07 +00:00 · 2020-07-03 12:53:20 -07:00 · 2020-07-03 12:53:20 -07:00 · e31e8d1550
commit e31e8d1550
parent 8775991c2d
10 changed files with 609 additions and 407 deletions
--- a/crates/nu-cli/src/cli.rs
+++ b/crates/nu-cli/src/cli.rs
@ -1,5 +1,5 @@
 use crate::commands::classified::block::run_block;
-use crate::commands::classified::external::{MaybeTextCodec, StringOrBinary};
+use crate::commands::classified::maybe_text_codec::{MaybeTextCodec, StringOrBinary};
 use crate::commands::plugin::JsonRpc;
 use crate::commands::plugin::{PluginCommand, PluginSink};
 use crate::commands::whole_stream_command;
@ -953,7 +953,7 @@ pub async fn process_line(

            let input_stream = if redirect_stdin {
                let file = futures::io::AllowStdIo::new(std::io::stdin());
-                let stream = FramedRead::new(file, MaybeTextCodec).map(|line| {
+                let stream = FramedRead::new(file, MaybeTextCodec::default()).map(|line| {
                    if let Ok(line) = line {
                        match line {
                            StringOrBinary::String(s) => Ok(Value {
--- a/crates/nu-cli/src/commands.rs
+++ b/crates/nu-cli/src/commands.rs
@ -20,6 +20,7 @@ pub(crate) mod clip;
 pub(crate) mod command;
 pub(crate) mod compact;
 pub(crate) mod config;
+pub(crate) mod constants;
 pub(crate) mod count;
 pub(crate) mod cp;
 pub(crate) mod date;
--- a/crates/nu-cli/src/commands/classified/external.rs
+++ b/crates/nu-cli/src/commands/classified/external.rs
@ -1,3 +1,4 @@
+use crate::commands::classified::maybe_text_codec::{MaybeTextCodec, StringOrBinary};
 use crate::evaluate::evaluate_baseline_expr;
 use crate::futures::ThreadedReceiver;
 use crate::prelude::*;
@ -7,9 +8,7 @@ use std::ops::Deref;
 use std::process::{Command, Stdio};
 use std::sync::mpsc;

-use bytes::{BufMut, Bytes, BytesMut};
 use futures::executor::block_on_stream;
-// use futures::stream::StreamExt;
 use futures_codec::FramedRead;
 use log::trace;

@ -18,70 +17,6 @@ use nu_protocol::hir::ExternalCommand;
 use nu_protocol::{Primitive, Scope, ShellTypeName, UntaggedValue, Value};
 use nu_source::Tag;

-pub enum StringOrBinary {
-    String(String),
-    Binary(Vec<u8>),
-}
-pub struct MaybeTextCodec;
-
-impl futures_codec::Encoder for MaybeTextCodec {
-    type Item = StringOrBinary;
-    type Error = std::io::Error;
-
-    fn encode(&mut self, item: Self::Item, dst: &mut BytesMut) -> Result<(), Self::Error> {
-        match item {
-            StringOrBinary::String(s) => {
-                dst.reserve(s.len());
-                dst.put(s.as_bytes());
-                Ok(())
-            }
-            StringOrBinary::Binary(b) => {
-                dst.reserve(b.len());
-                dst.put(Bytes::from(b));
-                Ok(())
-            }
-        }
-    }
-}
-
-impl futures_codec::Decoder for MaybeTextCodec {
-    type Item = StringOrBinary;
-    type Error = std::io::Error;
-
-    fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
-        let v: Vec<u8> = src.to_vec();
-        match String::from_utf8(v) {
-            Ok(s) => {
-                src.clear();
-                if s.is_empty() {
-                    Ok(None)
-                } else {
-                    Ok(Some(StringOrBinary::String(s)))
-                }
-            }
-            Err(err) => {
-                // Note: the longest UTF-8 character per Unicode spec is currently 6 bytes. If we fail somewhere earlier than the last 6 bytes,
-                // we know that we're failing to understand the string encoding and not just seeing a partial character. When this happens, let's
-                // fall back to assuming it's a binary buffer.
-                if src.is_empty() {
-                    Ok(None)
-                } else if src.len() > 6 && (src.len() - err.utf8_error().valid_up_to() > 6) {
-                    // Fall back to assuming binary
-                    let buf = src.to_vec();
-                    src.clear();
-                    Ok(Some(StringOrBinary::Binary(buf)))
-                } else {
-                    // Looks like a utf-8 string, so let's assume that
-                    let buf = src.split_to(err.utf8_error().valid_up_to() + 1);
-                    String::from_utf8(buf.to_vec())
-                        .map(|x| Some(StringOrBinary::String(x)))
-                        .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
-                }
-            }
-        }
-    }
-}
-
 pub(crate) async fn run_external_command(
    command: ExternalCommand,
    context: &mut Context,
@ -319,7 +254,7 @@ fn spawn(
                };

                let file = futures::io::AllowStdIo::new(stdout);
-                let stream = FramedRead::new(file, MaybeTextCodec);
+                let stream = FramedRead::new(file, MaybeTextCodec::default());

                for line in block_on_stream(stream) {
                    match line {
@ -373,7 +308,7 @@ fn spawn(
                }

                let file = futures::io::AllowStdIo::new(stderr);
-                let err_stream = FramedRead::new(file, MaybeTextCodec);
+                let err_stream = FramedRead::new(file, MaybeTextCodec::default());

                for err_line in block_on_stream(err_stream) {
                    match err_line {
--- a/crates/nu-cli/src/commands/classified/maybe_text_codec.rs
+++ b/crates/nu-cli/src/commands/classified/maybe_text_codec.rs
@ -0,0 +1,103 @@
+use bytes::{BufMut, Bytes, BytesMut};
+
+use nu_errors::ShellError;
+
+extern crate encoding_rs;
+use encoding_rs::{CoderResult, Decoder, Encoding, UTF_8};
+
+const OUTPUT_BUFFER_SIZE: usize = 8192;
+
+pub enum StringOrBinary {
+    String(String),
+    Binary(Vec<u8>),
+}
+
+pub struct MaybeTextCodec {
+    decoder: Decoder,
+}
+
+impl MaybeTextCodec {
+    // The constructor takes an Option<&'static Encoding>, because an absence of an encoding indicates that we want BOM sniffing enabled
+    pub fn new(encoding: Option<&'static Encoding>) -> Self {
+        let decoder = match encoding {
+            Some(e) => e.new_decoder_with_bom_removal(),
+            None => UTF_8.new_decoder(),
+        };
+        MaybeTextCodec { decoder }
+    }
+}
+
+impl Default for MaybeTextCodec {
+    // The default MaybeTextCodec uses a UTF_8 decoder
+    fn default() -> Self {
+        MaybeTextCodec {
+            decoder: UTF_8.new_decoder(),
+        }
+    }
+}
+
+impl futures_codec::Encoder for MaybeTextCodec {
+    type Item = StringOrBinary;
+    type Error = std::io::Error;
+
+    fn encode(&mut self, item: Self::Item, dst: &mut BytesMut) -> Result<(), Self::Error> {
+        match item {
+            StringOrBinary::String(s) => {
+                dst.reserve(s.len());
+                dst.put(s.as_bytes());
+                Ok(())
+            }
+            StringOrBinary::Binary(b) => {
+                dst.reserve(b.len());
+                dst.put(Bytes::from(b));
+                Ok(())
+            }
+        }
+    }
+}
+
+// TODO: Write some tests
+impl futures_codec::Decoder for MaybeTextCodec {
+    type Item = StringOrBinary;
+    type Error = ShellError;
+
+    fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
+        if src.is_empty() {
+            return Ok(None);
+        }
+
+        let mut s = String::with_capacity(OUTPUT_BUFFER_SIZE);
+
+        let (res, read, replacements) = self.decoder.decode_to_string(src, &mut s, false);
+        // If we had to make replacements when converting to utf8, fallback to binary
+        if replacements {
+            return Ok(Some(StringOrBinary::Binary(src.to_vec())));
+        }
+
+        match res {
+            CoderResult::InputEmpty => {
+                src.clear();
+                Ok(Some(StringOrBinary::String(s)))
+            }
+            CoderResult::OutputFull => {
+                // If the original buffer size is too small,
+                // We continue to allocate new Strings and append them to the result until the input buffer is smaller than the allocated String
+                let mut starting_index = read;
+                loop {
+                    let mut more = String::with_capacity(OUTPUT_BUFFER_SIZE);
+                    let (res, read, _replacements) =
+                        self.decoder
+                            .decode_to_string(&src[starting_index..], &mut more, false);
+                    s.push_str(&more);
+                    // Our input buffer is smaller than out allocated String, we can stop now
+                    if let CoderResult::InputEmpty = res {
+                        break;
+                    }
+                    starting_index += read;
+                }
+                src.clear();
+                Ok(Some(StringOrBinary::String(s)))
+            }
+        }
+    }
+}
--- a/crates/nu-cli/src/commands/classified/mod.rs
+++ b/crates/nu-cli/src/commands/classified/mod.rs
@ -3,6 +3,7 @@ mod dynamic;
 pub(crate) mod expr;
 pub(crate) mod external;
 pub(crate) mod internal;
+pub(crate) mod maybe_text_codec;

 #[allow(unused_imports)]
 pub(crate) use dynamic::Command as DynamicCommand;
--- a/crates/nu-cli/src/commands/constants.rs
+++ b/crates/nu-cli/src/commands/constants.rs
@ -0,0 +1,358 @@
+pub const BAT_LANGUAGES: &[&str] = &[
+    "as",
+    "csv",
+    "tsv",
+    "applescript",
+    "script editor",
+    "s",
+    "S",
+    "adoc",
+    "asciidoc",
+    "asc",
+    "asa",
+    "yasm",
+    "nasm",
+    "asm",
+    "inc",
+    "mac",
+    "awk",
+    "bat",
+    "cmd",
+    "bib",
+    "sh",
+    "bash",
+    "zsh",
+    ".bash_aliases",
+    ".bash_completions",
+    ".bash_functions",
+    ".bash_login",
+    ".bash_logout",
+    ".bash_profile",
+    ".bash_variables",
+    ".bashrc",
+    ".profile",
+    ".textmate_init",
+    ".zshrc",
+    "PKGBUILD",
+    ".ebuild",
+    ".eclass",
+    "c",
+    "h",
+    "cs",
+    "csx",
+    "cpp",
+    "cc",
+    "cp",
+    "cxx",
+    "c++",
+    "C",
+    "h",
+    "hh",
+    "hpp",
+    "hxx",
+    "h++",
+    "inl",
+    "ipp",
+    "cabal",
+    "clj",
+    "cljc",
+    "cljs",
+    "edn",
+    "CMakeLists.txt",
+    "cmake",
+    "h.in",
+    "hh.in",
+    "hpp.in",
+    "hxx.in",
+    "h++.in",
+    "CMakeCache.txt",
+    "cr",
+    "css",
+    "css.erb",
+    "css.liquid",
+    "d",
+    "di",
+    "dart",
+    "diff",
+    "patch",
+    "Dockerfile",
+    "dockerfile",
+    "ex",
+    "exs",
+    "elm",
+    "erl",
+    "hrl",
+    "Emakefile",
+    "emakefile",
+    "fs",
+    "fsi",
+    "fsx",
+    "fs",
+    "fsi",
+    "fsx",
+    "fish",
+    "attributes",
+    "gitattributes",
+    ".gitattributes",
+    "COMMIT_EDITMSG",
+    "MERGE_MSG",
+    "TAG_EDITMSG",
+    "gitconfig",
+    ".gitconfig",
+    ".gitmodules",
+    "exclude",
+    "gitignore",
+    ".gitignore",
+    ".git",
+    "gitlog",
+    "git-rebase-todo",
+    "go",
+    "dot",
+    "DOT",
+    "gv",
+    "groovy",
+    "gvy",
+    "gradle",
+    "Jenkinsfile",
+    "hs",
+    "hs",
+    "hsc",
+    "show-nonprintable",
+    "html",
+    "htm",
+    "shtml",
+    "xhtml",
+    "asp",
+    "html.eex",
+    "yaws",
+    "rails",
+    "rhtml",
+    "erb",
+    "html.erb",
+    "adp",
+    "twig",
+    "html.twig",
+    "ini",
+    "INI",
+    "INF",
+    "reg",
+    "REG",
+    "lng",
+    "cfg",
+    "CFG",
+    "desktop",
+    "url",
+    "URL",
+    ".editorconfig",
+    ".hgrc",
+    "hgrc",
+    "java",
+    "bsh",
+    "properties",
+    "jsp",
+    "js",
+    "htc",
+    "js",
+    "jsx",
+    "babel",
+    "es6",
+    "js.erb",
+    "json",
+    "sublime-settings",
+    "sublime-menu",
+    "sublime-keymap",
+    "sublime-mousemap",
+    "sublime-theme",
+    "sublime-build",
+    "sublime-project",
+    "sublime-completions",
+    "sublime-commands",
+    "sublime-macro",
+    "sublime-color-scheme",
+    "ipynb",
+    "Pipfile.lock",
+    "jsonnet",
+    "libsonnet",
+    "libjsonnet",
+    "jl",
+    "kt",
+    "kts",
+    "tex",
+    "ltx",
+    "less",
+    "css.less",
+    "lisp",
+    "cl",
+    "clisp",
+    "l",
+    "mud",
+    "el",
+    "scm",
+    "ss",
+    "lsp",
+    "fasl",
+    "lhs",
+    "lua",
+    "make",
+    "GNUmakefile",
+    "makefile",
+    "Makefile",
+    "makefile.am",
+    "Makefile.am",
+    "makefile.in",
+    "Makefile.in",
+    "OCamlMakefile",
+    "mak",
+    "mk",
+    "md",
+    "mdown",
+    "markdown",
+    "markdn",
+    "matlab",
+    "build",
+    "nix",
+    "m",
+    "h",
+    "mm",
+    "M",
+    "h",
+    "ml",
+    "mli",
+    "mll",
+    "mly",
+    "pas",
+    "p",
+    "dpr",
+    "pl",
+    "pm",
+    "pod",
+    "t",
+    "PL",
+    "php",
+    "php3",
+    "php4",
+    "php5",
+    "php7",
+    "phps",
+    "phpt",
+    "phtml",
+    "txt",
+    "ps1",
+    "psm1",
+    "psd1",
+    "proto",
+    "protodevel",
+    "pb.txt",
+    "proto.text",
+    "textpb",
+    "pbtxt",
+    "prototxt",
+    "pp",
+    "epp",
+    "purs",
+    "py",
+    "py3",
+    "pyw",
+    "pyi",
+    "pyx",
+    "pyx.in",
+    "pxd",
+    "pxd.in",
+    "pxi",
+    "pxi.in",
+    "rpy",
+    "cpy",
+    "SConstruct",
+    "Sconstruct",
+    "sconstruct",
+    "SConscript",
+    "gyp",
+    "gypi",
+    "Snakefile",
+    "wscript",
+    "R",
+    "r",
+    "s",
+    "S",
+    "Rprofile",
+    "rd",
+    "re",
+    "rst",
+    "rest",
+    "robot",
+    "rb",
+    "Appfile",
+    "Appraisals",
+    "Berksfile",
+    "Brewfile",
+    "capfile",
+    "cgi",
+    "Cheffile",
+    "config.ru",
+    "Deliverfile",
+    "Fastfile",
+    "fcgi",
+    "Gemfile",
+    "gemspec",
+    "Guardfile",
+    "irbrc",
+    "jbuilder",
+    "Podfile",
+    "podspec",
+    "prawn",
+    "rabl",
+    "rake",
+    "Rakefile",
+    "Rantfile",
+    "rbx",
+    "rjs",
+    "ruby.rail",
+    "Scanfile",
+    "simplecov",
+    "Snapfile",
+    "thor",
+    "Thorfile",
+    "Vagrantfile",
+    "haml",
+    "sass",
+    "rxml",
+    "builder",
+    "rs",
+    "scala",
+    "sbt",
+    "sql",
+    "ddl",
+    "dml",
+    "erbsql",
+    "sql.erb",
+    "swift",
+    "log",
+    "tcl",
+    "tf",
+    "tfvars",
+    "hcl",
+    "sty",
+    "cls",
+    "textile",
+    "toml",
+    "tml",
+    "Cargo.lock",
+    "Gopkg.lock",
+    "Pipfile",
+    "ts",
+    "tsx",
+    "varlink",
+    "vim",
+    ".vimrc",
+    "xml",
+    "xsd",
+    "xslt",
+    "tld",
+    "dtml",
+    "rss",
+    "opml",
+    "svg",
+    "yaml",
+    "yml",
+    "sublime-syntax",
+];
--- a/crates/nu-cli/src/commands/enter.rs
+++ b/crates/nu-cli/src/commands/enter.rs
@ -121,21 +121,16 @@ async fn enter(

        let full_path = std::path::PathBuf::from(cwd);

-        let (file_extension, contents, contents_tag) = crate::commands::open::fetch(
+        let (file_extension, tagged_contents) = crate::commands::open::fetch(
            &full_path,
            &PathBuf::from(location_clone),
            tag.span,
-            match encoding {
-                Some(e) => e.to_string(),
-                _ => "".to_string(),
-            },
+            encoding,
        )
        .await?;

-        match contents {
+        match tagged_contents.value {
            UntaggedValue::Primitive(Primitive::String(_)) => {
-                let tagged_contents = contents.into_value(&contents_tag);
-
                if let Some(extension) = file_extension {
                    let command_name = format!("from {}", extension);
                    if let Some(converter) = registry.get_command(&command_name) {
@ -156,18 +151,18 @@ async fn enter(
                                scope: scope.clone(),
                            },
                        };
+                        let tag = tagged_contents.tag.clone();
                        let mut result = converter
                            .run(new_args.with_input(vec![tagged_contents]), &registry)
                            .await?;
                        let result_vec: Vec<Result<ReturnSuccess, ShellError>> =
                            result.drain_vec().await;
-
                        Ok(futures::stream::iter(result_vec.into_iter().map(
                            move |res| match res {
                                Ok(ReturnSuccess::Value(Value { value, .. })) => Ok(
                                    ReturnSuccess::Action(CommandAction::EnterValueShell(Value {
                                        value,
-                                        tag: contents_tag.clone(),
+                                        tag: tag.clone(),
                                    })),
                                ),
                                x => x,
@ -185,13 +180,9 @@ async fn enter(
                    )))
                }
            }
-            _ => {
-                let tagged_contents = contents.into_value(contents_tag);
-
-                Ok(OutputStream::one(ReturnSuccess::action(
-                    CommandAction::EnterValueShell(tagged_contents),
-                )))
-            }
+            _ => Ok(OutputStream::one(ReturnSuccess::action(
+                CommandAction::EnterValueShell(tagged_contents),
+            ))),
        }
    }
 }
--- a/crates/nu-cli/src/commands/open.rs
+++ b/crates/nu-cli/src/commands/open.rs
@ -1,15 +1,17 @@
+use crate::commands::classified::maybe_text_codec::{MaybeTextCodec, StringOrBinary};
 use crate::commands::WholeStreamCommand;
 use crate::prelude::*;
+use futures_codec::FramedRead;
 use nu_errors::ShellError;
-use nu_protocol::{CommandAction, ReturnSuccess, Signature, SyntaxShape, UntaggedValue};
+use nu_protocol::{CommandAction, ReturnSuccess, Signature, SyntaxShape, UntaggedValue, Value};
 use nu_source::{AnchorLocation, Span, Tagged};
-use std::path::{Path, PathBuf};
+use std::path::PathBuf;
 extern crate encoding_rs;
+use crate::commands::constants::BAT_LANGUAGES;
 use encoding_rs::*;
+use futures::prelude::*;
+use log::debug;
 use std::fs::File;
-use std::io::BufWriter;
-use std::io::Read;
-use std::io::Write;

 pub struct Open;

@ -81,23 +83,25 @@ documentation link at https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics"#
    }
 }

-pub fn get_encoding(opt: Option<String>) -> &'static Encoding {
+pub fn get_encoding(opt: Option<Tagged<String>>) -> Result<&'static Encoding, ShellError> {
    match opt {
-        None => UTF_8,
-        Some(label) => match Encoding::for_label((&label).as_bytes()) {
-            None => {
-                //print!("{} is not a known encoding label. Trying UTF-8.", label);
-                //std::process::exit(-2);
-                get_encoding(Some("utf-8".to_string()))
-            }
-            Some(encoding) => encoding,
+        None => Ok(UTF_8),
+        Some(label) => match Encoding::for_label((&label.item).as_bytes()) {
+            None => Err(ShellError::labeled_error(
+                format!(
+                    r#"{} is not a valid encoding, refer to https://docs.rs/encoding_rs/0.8.23/encoding_rs/#statics for a valid list of encodings"#,
+                    label.item
+                ),
+                "invalid encoding",
+                label.span(),
+            )),
+            Some(encoding) => Ok(encoding),
        },
    }
 }

 async fn open(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStream, ShellError> {
    let cwd = PathBuf::from(args.shell_manager.path());
-    let full_path = cwd;
    let registry = registry.clone();

    let (
@ -108,329 +112,135 @@ async fn open(args: CommandArgs, registry: &CommandRegistry) -> Result<OutputStr
        },
        _,
    ) = args.process(&registry).await?;
-    let enc = match encoding {
-        Some(e) => e.to_string(),
-        _ => "".to_string(),
-    };
-    let result = fetch(&full_path, &path.item, path.tag.span, enc).await;

-    let (file_extension, contents, contents_tag) = result?;
+    // TODO: Remove once Streams are supported everywhere!
+    // As a short term workaround for getting AutoConvert and Bat functionality (Those don't currently support Streams)

-    let file_extension = if raw.item {
+    // Check if the extension has a "from *" command OR "bat" supports syntax highlighting
+    // AND the user doesn't want the raw output
+    // In these cases, we will collect the Stream
+    let ext = if raw.item {
        None
    } else {
-        // If the extension could not be determined via mimetype, try to use the path
-        // extension. Some file types do not declare their mimetypes (such as bson files).
-        file_extension.or_else(|| path.extension().map(|x| x.to_string_lossy().to_string()))
+        path.extension()
+            .map(|name| name.to_string_lossy().to_string())
    };

-    let tagged_contents = contents.into_value(&contents_tag);
-
-    if let Some(extension) = file_extension {
-        Ok(OutputStream::one(ReturnSuccess::action(
-            CommandAction::AutoConvert(tagged_contents, extension),
-        )))
-    } else {
-        Ok(OutputStream::one(ReturnSuccess::value(tagged_contents)))
+    if let Some(ext) = ext {
+        // Check if we have a conversion command
+        if let Some(_command) = registry.get_command(&format!("from {}", ext)) {
+            let (_, tagged_contents) = crate::commands::open::fetch(
+                &cwd,
+                &PathBuf::from(&path.item),
+                path.tag.span,
+                encoding,
+            )
+            .await?;
+            return Ok(OutputStream::one(ReturnSuccess::action(
+                CommandAction::AutoConvert(tagged_contents, ext),
+            )));
+        }
+        // Check if bat does syntax highlighting
+        if BAT_LANGUAGES.contains(&ext.as_ref()) {
+            let (_, tagged_contents) = crate::commands::open::fetch(
+                &cwd,
+                &PathBuf::from(&path.item),
+                path.tag.span,
+                encoding,
+            )
+            .await?;
+            return Ok(OutputStream::one(ReturnSuccess::value(tagged_contents)));
+        }
    }
+
+    // Normal Streaming operation
+    let with_encoding = if encoding.is_none() {
+        None
+    } else {
+        Some(get_encoding(encoding)?)
+    };
+    let f = File::open(&path).map_err(|e| {
+        ShellError::labeled_error(
+            format!("Error opening file: {:?}", e),
+            "Error opening file",
+            path.span(),
+        )
+    })?;
+    let async_reader = futures::io::AllowStdIo::new(f);
+    let sob_stream = FramedRead::new(async_reader, MaybeTextCodec::new(with_encoding))
+        .map_err(|e| ShellError::unexpected(format!("AsyncRead failed in open function: {:?}", e)))
+        .into_stream();
+
+    let final_stream = sob_stream.map(|x| match x {
+        Ok(StringOrBinary::String(s)) => {
+            ReturnSuccess::value(UntaggedValue::string(s).into_untagged_value())
+        }
+        Ok(StringOrBinary::Binary(b)) => ReturnSuccess::value(
+            UntaggedValue::binary(b.into_iter().collect()).into_untagged_value(),
+        ),
+        Err(se) => Err(se),
+    });
+
+    Ok(OutputStream::new(final_stream))
 }

+// Note that we do not output a Stream in "fetch" since it is only used by "enter" command
+// Which we expect to use a concrete Value a not a Stream
 pub async fn fetch(
    cwd: &PathBuf,
    location: &PathBuf,
    span: Span,
-    encoding: String,
-) -> Result<(Option<String>, UntaggedValue, Tag), ShellError> {
+    encoding_choice: Option<Tagged<String>>,
+) -> Result<(Option<String>, Value), ShellError> {
+    // TODO: I don't understand the point of this? Maybe for better error reporting
    let mut cwd = cwd.clone();
-    let output_encoding: &Encoding = get_encoding(Some("utf-8".to_string()));
-    let input_encoding: &Encoding = get_encoding(Some(encoding.clone()));
-    let mut decoder = input_encoding.new_decoder();
-    let mut encoder = output_encoding.new_encoder();
-    let mut _file: File;
-    let buf = Vec::new();
-    let mut bufwriter = BufWriter::new(buf);
-
-    cwd.push(Path::new(location));
-    if let Ok(cwd) = dunce::canonicalize(&cwd) {
-        if !encoding.is_empty() {
-            // use the encoding string
-            match File::open(&Path::new(&cwd)) {
-                Ok(mut _file) => {
-                    convert_via_utf8(
-                        &mut decoder,
-                        &mut encoder,
-                        &mut _file,
-                        &mut bufwriter,
-                        false,
-                    );
-                    //bufwriter.flush()?;
-                    Ok((
-                        cwd.extension()
-                            .map(|name| name.to_string_lossy().to_string()),
-                        UntaggedValue::string(String::from_utf8_lossy(&bufwriter.buffer())),
-                        Tag {
-                            span,
-                            anchor: Some(AnchorLocation::File(cwd.to_string_lossy().to_string())),
-                        },
-                    ))
-                }
-                Err(_) => Err(ShellError::labeled_error(
-                    format!("Cannot open {:?} for reading.", &cwd),
-                    "file not found",
-                    span,
-                )),
-            }
-        } else {
-            // Do the old stuff
-            match std::fs::read(&cwd) {
-                Ok(bytes) => match std::str::from_utf8(&bytes) {
-                    Ok(s) => Ok((
-                        cwd.extension()
-                            .map(|name| name.to_string_lossy().to_string()),
-                        UntaggedValue::string(s),
-                        Tag {
-                            span,
-                            anchor: Some(AnchorLocation::File(cwd.to_string_lossy().to_string())),
-                        },
-                    )),
-                    Err(_) => {
-                        //Non utf8 data.
-                        match (bytes.get(0), bytes.get(1)) {
-                            (Some(x), Some(y)) if *x == 0xff && *y == 0xfe => {
-                                // Possibly UTF-16 little endian
-                                let utf16 = read_le_u16(&bytes[2..]);
-
-                                if let Some(utf16) = utf16 {
-                                    match std::string::String::from_utf16(&utf16) {
-                                        Ok(s) => Ok((
-                                            cwd.extension()
-                                                .map(|name| name.to_string_lossy().to_string()),
-                                            UntaggedValue::string(s),
-                                            Tag {
-                                                span,
-                                                anchor: Some(AnchorLocation::File(
-                                                    cwd.to_string_lossy().to_string(),
-                                                )),
-                                            },
-                                        )),
-                                        Err(_) => Ok((
-                                            None,
-                                            UntaggedValue::binary(bytes),
-                                            Tag {
-                                                span,
-                                                anchor: Some(AnchorLocation::File(
-                                                    cwd.to_string_lossy().to_string(),
-                                                )),
-                                            },
-                                        )),
-                                    }
-                                } else {
-                                    Ok((
-                                        None,
-                                        UntaggedValue::binary(bytes),
-                                        Tag {
-                                            span,
-                                            anchor: Some(AnchorLocation::File(
-                                                cwd.to_string_lossy().to_string(),
-                                            )),
-                                        },
-                                    ))
-                                }
-                            }
-                            (Some(x), Some(y)) if *x == 0xfe && *y == 0xff => {
-                                // Possibly UTF-16 big endian
-                                let utf16 = read_be_u16(&bytes[2..]);
-
-                                if let Some(utf16) = utf16 {
-                                    match std::string::String::from_utf16(&utf16) {
-                                        Ok(s) => Ok((
-                                            cwd.extension()
-                                                .map(|name| name.to_string_lossy().to_string()),
-                                            UntaggedValue::string(s),
-                                            Tag {
-                                                span,
-                                                anchor: Some(AnchorLocation::File(
-                                                    cwd.to_string_lossy().to_string(),
-                                                )),
-                                            },
-                                        )),
-                                        Err(_) => Ok((
-                                            None,
-                                            UntaggedValue::binary(bytes),
-                                            Tag {
-                                                span,
-                                                anchor: Some(AnchorLocation::File(
-                                                    cwd.to_string_lossy().to_string(),
-                                                )),
-                                            },
-                                        )),
-                                    }
-                                } else {
-                                    Ok((
-                                        None,
-                                        UntaggedValue::binary(bytes),
-                                        Tag {
-                                            span,
-                                            anchor: Some(AnchorLocation::File(
-                                                cwd.to_string_lossy().to_string(),
-                                            )),
-                                        },
-                                    ))
-                                }
-                            }
-                            _ => Ok((
-                                None,
-                                UntaggedValue::binary(bytes),
-                                Tag {
-                                    span,
-                                    anchor: Some(AnchorLocation::File(
-                                        cwd.to_string_lossy().to_string(),
-                                    )),
-                                },
-                            )),
-                        }
-                    }
-                },
-                Err(_) => Err(ShellError::labeled_error(
-                    format!("Cannot open {:?} for reading.", &cwd),
-                    "file not found",
-                    span,
-                )),
-            }
-        }
-    } else {
-        Err(ShellError::labeled_error(
-            format!("Cannot open {:?} for reading.", &cwd),
-            "file not found",
+    cwd.push(location);
+    let nice_location = dunce::canonicalize(&cwd).map_err(|e| {
+        ShellError::labeled_error(
+            format!("Cannot canonicalize file {:?} because {:?}", &cwd, e),
+            "Cannot canonicalize",
            span,
-        ))
-    }
-}
+        )
+    })?;

-fn convert_via_utf8(
-    decoder: &mut Decoder,
-    encoder: &mut Encoder,
-    read: &mut dyn Read,
-    write: &mut dyn Write,
-    last: bool,
-) {
-    let mut input_buffer = [0u8; 2048];
-    let mut intermediate_buffer_bytes = [0u8; 4096];
-    // Is there a safe way to create a stack-allocated &mut str?
-    let mut intermediate_buffer: &mut str =
-        //unsafe { std::mem::transmute(&mut intermediate_buffer_bytes[..]) };
-        std::str::from_utf8_mut(&mut intermediate_buffer_bytes[..]).expect("error with from_utf8_mut");
-    let mut output_buffer = [0u8; 4096];
-    let mut current_input_ended = false;
-    while !current_input_ended {
-        match read.read(&mut input_buffer) {
-            Err(_) => {
-                print!("Error reading input.");
-                //std::process::exit(-5);
-            }
-            Ok(decoder_input_end) => {
-                current_input_ended = decoder_input_end == 0;
-                let input_ended = last && current_input_ended;
-                let mut decoder_input_start = 0usize;
-                loop {
-                    let (decoder_result, decoder_read, decoder_written, _) = decoder.decode_to_str(
-                        &input_buffer[decoder_input_start..decoder_input_end],
-                        &mut intermediate_buffer,
-                        input_ended,
-                    );
-                    decoder_input_start += decoder_read;
+    // The extension may be used in AutoConvert later on
+    let ext = location
+        .extension()
+        .map(|name| name.to_string_lossy().to_string());

-                    let last_output = if input_ended {
-                        match decoder_result {
-                            CoderResult::InputEmpty => true,
-                            CoderResult::OutputFull => false,
-                        }
-                    } else {
-                        false
-                    };
+    // The tag that will used when returning a Value
+    let file_tag = Tag {
+        span,
+        anchor: Some(AnchorLocation::File(
+            nice_location.to_string_lossy().to_string(),
+        )),
+    };

-                    // Regardless of whether the intermediate buffer got full
-                    // or the input buffer was exhausted, let's process what's
-                    // in the intermediate buffer.
+    let res = std::fs::read(location)?;

-                    if encoder.encoding() == UTF_8 {
-                        // If the target is UTF-8, optimize out the encoder.
-                        if write
-                            .write_all(&intermediate_buffer.as_bytes()[..decoder_written])
-                            .is_err()
-                        {
-                            print!("Error writing output.");
-                            //std::process::exit(-7);
-                        }
-                    } else {
-                        let mut encoder_input_start = 0usize;
-                        loop {
-                            let (encoder_result, encoder_read, encoder_written, _) = encoder
-                                .encode_from_utf8(
-                                    &intermediate_buffer[encoder_input_start..decoder_written],
-                                    &mut output_buffer,
-                                    last_output,
-                                );
-                            encoder_input_start += encoder_read;
-                            if write.write_all(&output_buffer[..encoder_written]).is_err() {
-                                print!("Error writing output.");
-                                //std::process::exit(-6);
-                            }
-                            match encoder_result {
-                                CoderResult::InputEmpty => {
-                                    break;
-                                }
-                                CoderResult::OutputFull => {
-                                    continue;
-                                }
-                            }
-                        }
-                    }
-
-                    // Now let's see if we should read again or process the
-                    // rest of the current input buffer.
-                    match decoder_result {
-                        CoderResult::InputEmpty => {
-                            break;
-                        }
-                        CoderResult::OutputFull => {
-                            continue;
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-fn read_le_u16(input: &[u8]) -> Option<Vec<u16>> {
-    if input.len() % 2 != 0 || input.len() < 2 {
-        None
+    // If no encoding is provided we try to guess the encoding to read the file with
+    let encoding = if encoding_choice.is_none() {
+        UTF_8
    } else {
-        let mut result = vec![];
-        let mut pos = 0;
-        while pos < input.len() {
-            result.push(u16::from_le_bytes([input[pos], input[pos + 1]]));
-            pos += 2;
-        }
+        get_encoding(encoding_choice.clone())?
+    };

-        Some(result)
-    }
-}
-
-fn read_be_u16(input: &[u8]) -> Option<Vec<u16>> {
-    if input.len() % 2 != 0 || input.len() < 2 {
-        None
+    // If the user specified an encoding, then do not do BOM sniffing
+    let decoded_res = if encoding_choice.is_some() {
+        let (cow_res, _replacements) = encoding.decode_with_bom_removal(&res);
+        cow_res
    } else {
-        let mut result = vec![];
-        let mut pos = 0;
-        while pos < input.len() {
-            result.push(u16::from_be_bytes([input[pos], input[pos + 1]]));
-            pos += 2;
+        // Otherwise, use the default UTF-8 encoder with BOM sniffing
+        let (cow_res, actual_encoding, replacements) = encoding.decode(&res);
+        // If we had to use replacement characters then fallback to binary
+        if replacements {
+            return Ok((ext, UntaggedValue::binary(res).into_value(file_tag)));
        }
-
-        Some(result)
-    }
+        debug!("Decoded using {:?}", actual_encoding);
+        cow_res
+    };
+    let v = UntaggedValue::string(decoded_res.to_string()).into_value(file_tag);
+    Ok((ext, v))
 }

 #[cfg(test)]
--- a/crates/nu-cli/tests/commands/enter.rs
+++ b/crates/nu-cli/tests/commands/enter.rs
@ -80,7 +80,6 @@ fn errors_if_file_not_found() {
            "enter i_dont_exist.csv"
        );

-        //assert!(actual.err.contains("File could not be opened"));
-        assert!(actual.err.contains("file not found"));
+        assert!(actual.err.contains("Cannot canonicalize"));
    })
 }
--- a/crates/nu-cli/tests/commands/open.rs
+++ b/crates/nu-cli/tests/commands/open.rs
@ -224,7 +224,11 @@ fn errors_if_file_not_found() {
        cwd: "tests/fixtures/formats",
        "open i_dont_exist.txt"
    );
-
-    //assert!(actual.err.contains("File could not be opened"));
-    assert!(actual.err.contains("Cannot open"));
+    let expected = "Cannot canonicalize";
+    assert!(
+        actual.err.contains(expected),
+        "Error:\n{}\ndoes not contain{}",
+        actual.err,
+        expected
+    );
 }