add more helpful error with text/xml (#13609)

# Description This PR is meant to provide a more helpful error message when using http get and the content type can't be parsed. ### Before ![image](https://github.com/user-attachments/assets/4e6176e2-ec35-48d8-acb3-af5d1cda4327) ### After ![image](https://github.com/user-attachments/assets/aa498ef7-f1ca-495b-8790-484593f02e35) The span isn't perfect but there's no way to get the span of the content type that I can see. In the middle of fixing this error, I also discovered how to fix the problem in general. Since you can now see the error message complaining about double quotes (char 22 at position 0. 22 hex is `"`). The fix is just to remove all the double quotes from the content_type and then you get this. ### After After ![image](https://github.com/user-attachments/assets/2223d34f-4563-4dea-90eb-83326e808af1) The discussion on Discord about this is that `--raw` or `--ignore-errors` should eat this error and it "just work" as well as default to text or binary when the mime parsing fails. I agree but this PR does not implement that. # User-Facing Changes  # Tests + Formatting  # After Submitting
2025-01-13 13:49:21 +00:00 · 2024-08-13 14:27:28 -05:00 · 2024-08-13 14:27:28 -05:00 · 5f45f6c223
commit 5f45f6c223
parent a55d172e52
3 changed files with 21 additions and 18 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -110,7 +110,7 @@ lsp-types = "0.95.0"
 mach2 = "0.4"
 md5 = { version = "0.10", package = "md-5" }
 miette = "7.2"
-mime = "0.3"
+mime = "0.3.17"
 mime_guess = "2.0"
 mockito = { version = "1.5", default-features = false }
 multipart-rs = "0.1.11"
--- a/crates/nu-command/Cargo.toml
+++ b/crates/nu-command/Cargo.toml
@ -50,7 +50,7 @@ encoding_rs = { workspace = true }
 fancy-regex = { workspace = true }
 filesize = { workspace = true }
 filetime = { workspace = true }
-fs_extra = { workspace =  true }
+fs_extra = { workspace = true }
 human-date-parser = { workspace = true }
 indexmap = { workspace = true }
 indicatif = { workspace = true }
@ -88,7 +88,7 @@ sysinfo = { workspace = true }
 tabled = { workspace = true, features = ["color"], default-features = false }
 terminal_size = { workspace = true }
 titlecase = { workspace = true }
-toml = { workspace = true, features = ["preserve_order"]}
+toml = { workspace = true, features = ["preserve_order"] }
 unicode-segmentation = { workspace = true }
 ureq = { workspace = true, default-features = false, features = ["charset", "gzip", "json", "native-tls"] }
 url = { workspace = true }
--- a/crates/nu-command/src/network/http/client.rs
+++ b/crates/nu-command/src/network/http/client.rs
@ -4,9 +4,10 @@ use base64::{
    engine::{general_purpose::PAD, GeneralPurpose},
    Engine,
 };
+use fancy_regex::Regex;
 use multipart_rs::MultipartWriter;
 use nu_engine::command_prelude::*;
-use nu_protocol::{ByteStream, Signals};
+use nu_protocol::{ByteStream, LabeledError, Signals};
 use std::{
    collections::HashMap,
    io::Cursor,
@ -560,23 +561,25 @@ fn transform_response_using_content_type(
    resp: Response,
    content_type: &str,
 ) -> Result<PipelineData, ShellError> {
-    let content_type =
-        mime::Mime::from_str(content_type).map_err(|_| ShellError::GenericError {
-            error: format!("MIME type unknown: {content_type}"),
-            msg: "".into(),
-            span: None,
-            help: Some("given unknown MIME type".into()),
-            inner: vec![],
-        })?;
+    let regex = Regex::new("\"").expect("Failed to create regex");
+    let content_type_trim = regex.replace_all(content_type, "").to_string();
+
+    let content_type = mime::Mime::from_str(&content_type_trim).map_err(|err| {
+        LabeledError::new(err.to_string())
+            .with_help("given unknown MIME type, or error parsing MIME type")
+            .with_label(format!("MIME type unknown: {content_type_trim}"), span)
+    })?;
+
    let ext = match (content_type.type_(), content_type.subtype()) {
        (mime::TEXT, mime::PLAIN) => {
            let path_extension = url::Url::parse(requested_url)
-                .map_err(|_| ShellError::GenericError {
-                    error: format!("Cannot parse URL: {requested_url}"),
-                    msg: "".into(),
-                    span: None,
-                    help: Some("cannot parse".into()),
-                    inner: vec![],
+                .map_err(|err| {
+                    LabeledError::new(err.to_string())
+                        .with_help("cannot parse")
+                        .with_label(
+                            format!("Cannot parse URL: {requested_url}"),
+                            Span::unknown(),
+                        )
                })?
                .path_segments()
                .and_then(|segments| segments.last())