add more helpful error with text/xml (#13609)

# Description

This PR is meant to provide a more helpful error message when using http
get and the content type can't be parsed.

### Before

![image](https://github.com/user-attachments/assets/4e6176e2-ec35-48d8-acb3-af5d1cda4327)

### After

![image](https://github.com/user-attachments/assets/aa498ef7-f1ca-495b-8790-484593f02e35)
The span isn't perfect but there's no way to get the span of the content
type that I can see.

In the middle of fixing this error, I also discovered how to fix the
problem in general. Since you can now see the error message complaining
about double quotes (char 22 at position 0. 22 hex is `"`). The fix is
just to remove all the double quotes from the content_type and then you
get this.

### After After

![image](https://github.com/user-attachments/assets/2223d34f-4563-4dea-90eb-83326e808af1)

The discussion on Discord about this is that `--raw` or
`--ignore-errors` should eat this error and it "just work" as well as
default to text or binary when the mime parsing fails. I agree but this
PR does not implement that.

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
Darren Schroeder 2024-08-13 14:27:28 -05:00 committed by GitHub
parent a55d172e52
commit 5f45f6c223
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 21 additions and 18 deletions

View file

@ -110,7 +110,7 @@ lsp-types = "0.95.0"
mach2 = "0.4"
md5 = { version = "0.10", package = "md-5" }
miette = "7.2"
mime = "0.3"
mime = "0.3.17"
mime_guess = "2.0"
mockito = { version = "1.5", default-features = false }
multipart-rs = "0.1.11"

View file

@ -50,7 +50,7 @@ encoding_rs = { workspace = true }
fancy-regex = { workspace = true }
filesize = { workspace = true }
filetime = { workspace = true }
fs_extra = { workspace = true }
fs_extra = { workspace = true }
human-date-parser = { workspace = true }
indexmap = { workspace = true }
indicatif = { workspace = true }
@ -88,7 +88,7 @@ sysinfo = { workspace = true }
tabled = { workspace = true, features = ["color"], default-features = false }
terminal_size = { workspace = true }
titlecase = { workspace = true }
toml = { workspace = true, features = ["preserve_order"]}
toml = { workspace = true, features = ["preserve_order"] }
unicode-segmentation = { workspace = true }
ureq = { workspace = true, default-features = false, features = ["charset", "gzip", "json", "native-tls"] }
url = { workspace = true }

View file

@ -4,9 +4,10 @@ use base64::{
engine::{general_purpose::PAD, GeneralPurpose},
Engine,
};
use fancy_regex::Regex;
use multipart_rs::MultipartWriter;
use nu_engine::command_prelude::*;
use nu_protocol::{ByteStream, Signals};
use nu_protocol::{ByteStream, LabeledError, Signals};
use std::{
collections::HashMap,
io::Cursor,
@ -560,23 +561,25 @@ fn transform_response_using_content_type(
resp: Response,
content_type: &str,
) -> Result<PipelineData, ShellError> {
let content_type =
mime::Mime::from_str(content_type).map_err(|_| ShellError::GenericError {
error: format!("MIME type unknown: {content_type}"),
msg: "".into(),
span: None,
help: Some("given unknown MIME type".into()),
inner: vec![],
})?;
let regex = Regex::new("\"").expect("Failed to create regex");
let content_type_trim = regex.replace_all(content_type, "").to_string();
let content_type = mime::Mime::from_str(&content_type_trim).map_err(|err| {
LabeledError::new(err.to_string())
.with_help("given unknown MIME type, or error parsing MIME type")
.with_label(format!("MIME type unknown: {content_type_trim}"), span)
})?;
let ext = match (content_type.type_(), content_type.subtype()) {
(mime::TEXT, mime::PLAIN) => {
let path_extension = url::Url::parse(requested_url)
.map_err(|_| ShellError::GenericError {
error: format!("Cannot parse URL: {requested_url}"),
msg: "".into(),
span: None,
help: Some("cannot parse".into()),
inner: vec![],
.map_err(|err| {
LabeledError::new(err.to_string())
.with_help("cannot parse")
.with_label(
format!("Cannot parse URL: {requested_url}"),
Span::unknown(),
)
})?
.path_segments()
.and_then(|segments| segments.last())