mirror of
https://github.com/nushell/nushell
synced 2024-12-27 21:43:09 +00:00
Fix internal panic for query web
(#13507)
<!-- if this PR closes one or more issues, you can automatically link the PR with them by using one of the [*linking keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword), e.g. - this PR should close #xxxx - fixes #xxxx you can also mention related issues, PRs or discussions! --> # Description <!-- Thank you for improving Nushell. Please, check our [contributing guide](../CONTRIBUTING.md) and talk to the core team before making major changes. Description of your pull request goes here. **Provide examples and/or screenshots** if your changes affect the user experience. --> Original implementation contains multiple `expect` which can cause internal runtime panic. This pr forks the `css` selector impl and make it return an error that nushell can recognize. **Note:** The original impl is still used in pre-defined selector implementations, but they should never fail, so the `css` fn is preserved. Closes #13496. # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> Now `query web` will not panic when the `query` parameter is not given or has syntax error. ```plain ❯ .\target\debug\nu.exe -c "http get https://www.rust-lang.org | query web" Error: × CSS query parse error ╭─[source:1:38] 1 │ http get https://www.rust-lang.org | query web · ────┬──── · ╰─┤ Unexpected error occurred. Please report this to the developer · │ EmptySelector ╰──── help: cannot parse query as a valid CSS selector ``` # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: --> - [x] `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - [x] `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - [x] `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - [x] `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the tests for the standard library <!-- > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. --> - [x] Impl change, no doc update. --------- Co-authored-by: Stefan Holderbach <sholderbach@users.noreply.github.com>
This commit is contained in:
parent
ed82f9ee18
commit
af34d5c062
1 changed files with 91 additions and 70 deletions
|
@ -99,29 +99,11 @@ pub fn web_examples() -> Vec<Example<'static>> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Selector {
|
pub struct Selector {
|
||||||
pub query: String,
|
pub query: Spanned<String>,
|
||||||
pub as_html: bool,
|
pub as_html: bool,
|
||||||
pub attribute: Value,
|
pub attribute: Value,
|
||||||
pub as_table: Value,
|
pub as_table: Value,
|
||||||
pub inspect: bool,
|
pub inspect: Spanned<bool>,
|
||||||
}
|
|
||||||
|
|
||||||
impl Selector {
|
|
||||||
pub fn new() -> Selector {
|
|
||||||
Selector {
|
|
||||||
query: String::new(),
|
|
||||||
as_html: false,
|
|
||||||
attribute: Value::string("".to_string(), Span::unknown()),
|
|
||||||
as_table: Value::string("".to_string(), Span::unknown()),
|
|
||||||
inspect: false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for Selector {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::new()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse_selector_params(call: &EvaluatedCall, input: &Value) -> Result<Value, LabeledError> {
|
pub fn parse_selector_params(call: &EvaluatedCall, input: &Value) -> Result<Value, LabeledError> {
|
||||||
|
@ -136,43 +118,46 @@ pub fn parse_selector_params(call: &EvaluatedCall, input: &Value) -> Result<Valu
|
||||||
.unwrap_or_else(|| Value::nothing(head));
|
.unwrap_or_else(|| Value::nothing(head));
|
||||||
|
|
||||||
let inspect = call.has_flag("inspect")?;
|
let inspect = call.has_flag("inspect")?;
|
||||||
|
let inspect_span = call.get_flag_span("inspect").unwrap_or(call.head);
|
||||||
if let Some(query) = &query {
|
|
||||||
if let Err(err) = ScraperSelector::parse(&query.item) {
|
|
||||||
return Err(LabeledError::new("CSS query parse error")
|
|
||||||
.with_label(err.to_string(), query.span)
|
|
||||||
.with_help("cannot parse this query as a valid CSS selector"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let selector = Selector {
|
let selector = Selector {
|
||||||
query: query.map(|q| q.item).unwrap_or_default(),
|
query: query.unwrap_or(Spanned {
|
||||||
|
span: call.head,
|
||||||
|
item: "".to_owned(),
|
||||||
|
}),
|
||||||
as_html,
|
as_html,
|
||||||
attribute,
|
attribute,
|
||||||
as_table,
|
as_table,
|
||||||
inspect,
|
inspect: Spanned {
|
||||||
|
item: inspect,
|
||||||
|
span: inspect_span,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
let span = input.span();
|
let span = input.span();
|
||||||
match input {
|
match input {
|
||||||
Value::String { val, .. } => Ok(begin_selector_query(val.to_string(), selector, span)),
|
Value::String { val, .. } => begin_selector_query(val.to_string(), selector, span),
|
||||||
_ => Err(LabeledError::new("Requires text input")
|
_ => Err(LabeledError::new("Requires text input")
|
||||||
.with_label("expected text from pipeline", span)),
|
.with_label("expected text from pipeline", span)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn begin_selector_query(input_html: String, selector: Selector, span: Span) -> Value {
|
fn begin_selector_query(
|
||||||
|
input_html: String,
|
||||||
|
selector: Selector,
|
||||||
|
span: Span,
|
||||||
|
) -> Result<Value, LabeledError> {
|
||||||
if let Value::List { .. } = selector.as_table {
|
if let Value::List { .. } = selector.as_table {
|
||||||
return retrieve_tables(
|
retrieve_tables(
|
||||||
input_html.as_str(),
|
input_html.as_str(),
|
||||||
&selector.as_table,
|
&selector.as_table,
|
||||||
selector.inspect,
|
selector.inspect.item,
|
||||||
span,
|
span,
|
||||||
);
|
)
|
||||||
} else if selector.attribute.is_empty() {
|
} else if selector.attribute.is_empty() {
|
||||||
execute_selector_query(
|
execute_selector_query(
|
||||||
input_html.as_str(),
|
input_html.as_str(),
|
||||||
selector.query.as_str(),
|
selector.query,
|
||||||
selector.as_html,
|
selector.as_html,
|
||||||
selector.inspect,
|
selector.inspect,
|
||||||
span,
|
span,
|
||||||
|
@ -180,7 +165,7 @@ fn begin_selector_query(input_html: String, selector: Selector, span: Span) -> V
|
||||||
} else if let Value::List { .. } = selector.attribute {
|
} else if let Value::List { .. } = selector.attribute {
|
||||||
execute_selector_query_with_attributes(
|
execute_selector_query_with_attributes(
|
||||||
input_html.as_str(),
|
input_html.as_str(),
|
||||||
selector.query.as_str(),
|
selector.query,
|
||||||
&selector.attribute,
|
&selector.attribute,
|
||||||
selector.inspect,
|
selector.inspect,
|
||||||
span,
|
span,
|
||||||
|
@ -188,7 +173,7 @@ fn begin_selector_query(input_html: String, selector: Selector, span: Span) -> V
|
||||||
} else {
|
} else {
|
||||||
execute_selector_query_with_attribute(
|
execute_selector_query_with_attribute(
|
||||||
input_html.as_str(),
|
input_html.as_str(),
|
||||||
selector.query.as_str(),
|
selector.query,
|
||||||
selector.attribute.as_str().unwrap_or(""),
|
selector.attribute.as_str().unwrap_or(""),
|
||||||
selector.inspect,
|
selector.inspect,
|
||||||
span,
|
span,
|
||||||
|
@ -201,7 +186,7 @@ pub fn retrieve_tables(
|
||||||
columns: &Value,
|
columns: &Value,
|
||||||
inspect_mode: bool,
|
inspect_mode: bool,
|
||||||
span: Span,
|
span: Span,
|
||||||
) -> Value {
|
) -> Result<Value, LabeledError> {
|
||||||
let html = input_string;
|
let html = input_string;
|
||||||
let mut cols: Vec<String> = Vec::new();
|
let mut cols: Vec<String> = Vec::new();
|
||||||
if let Value::List { vals, .. } = &columns {
|
if let Value::List { vals, .. } = &columns {
|
||||||
|
@ -228,11 +213,15 @@ pub fn retrieve_tables(
|
||||||
};
|
};
|
||||||
|
|
||||||
if tables.len() == 1 {
|
if tables.len() == 1 {
|
||||||
return retrieve_table(
|
return Ok(retrieve_table(
|
||||||
tables.into_iter().next().expect("Error retrieving table"),
|
tables.into_iter().next().ok_or_else(|| {
|
||||||
|
LabeledError::new("Cannot retrieve table")
|
||||||
|
.with_label("Error retrieving table.", span)
|
||||||
|
.with_help("No table found.")
|
||||||
|
})?,
|
||||||
columns,
|
columns,
|
||||||
span,
|
span,
|
||||||
);
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let vals = tables
|
let vals = tables
|
||||||
|
@ -240,7 +229,7 @@ pub fn retrieve_tables(
|
||||||
.map(move |table| retrieve_table(table, columns, span))
|
.map(move |table| retrieve_table(table, columns, span))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
Value::list(vals, span)
|
Ok(Value::list(vals, span))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn retrieve_table(mut table: WebTable, columns: &Value, span: Span) -> Value {
|
fn retrieve_table(mut table: WebTable, columns: &Value, span: Span) -> Value {
|
||||||
|
@ -323,15 +312,15 @@ fn retrieve_table(mut table: WebTable, columns: &Value, span: Span) -> Value {
|
||||||
|
|
||||||
fn execute_selector_query_with_attribute(
|
fn execute_selector_query_with_attribute(
|
||||||
input_string: &str,
|
input_string: &str,
|
||||||
query_string: &str,
|
query_string: Spanned<String>,
|
||||||
attribute: &str,
|
attribute: &str,
|
||||||
inspect: bool,
|
inspect: Spanned<bool>,
|
||||||
span: Span,
|
span: Span,
|
||||||
) -> Value {
|
) -> Result<Value, LabeledError> {
|
||||||
let doc = Html::parse_fragment(input_string);
|
let doc = Html::parse_fragment(input_string);
|
||||||
|
|
||||||
let vals: Vec<Value> = doc
|
let vals: Vec<Value> = doc
|
||||||
.select(&css(query_string, inspect))
|
.select(&fallible_css(query_string, inspect)?)
|
||||||
.map(|selection| {
|
.map(|selection| {
|
||||||
Value::string(
|
Value::string(
|
||||||
selection.value().attr(attribute).unwrap_or("").to_string(),
|
selection.value().attr(attribute).unwrap_or("").to_string(),
|
||||||
|
@ -339,16 +328,16 @@ fn execute_selector_query_with_attribute(
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
Value::list(vals, span)
|
Ok(Value::list(vals, span))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn execute_selector_query_with_attributes(
|
fn execute_selector_query_with_attributes(
|
||||||
input_string: &str,
|
input_string: &str,
|
||||||
query_string: &str,
|
query_string: Spanned<String>,
|
||||||
attributes: &Value,
|
attributes: &Value,
|
||||||
inspect: bool,
|
inspect: Spanned<bool>,
|
||||||
span: Span,
|
span: Span,
|
||||||
) -> Value {
|
) -> Result<Value, LabeledError> {
|
||||||
let doc = Html::parse_fragment(input_string);
|
let doc = Html::parse_fragment(input_string);
|
||||||
|
|
||||||
let mut attrs: Vec<String> = Vec::new();
|
let mut attrs: Vec<String> = Vec::new();
|
||||||
|
@ -361,7 +350,7 @@ fn execute_selector_query_with_attributes(
|
||||||
}
|
}
|
||||||
|
|
||||||
let vals: Vec<Value> = doc
|
let vals: Vec<Value> = doc
|
||||||
.select(&css(query_string, inspect))
|
.select(&fallible_css(query_string, inspect)?)
|
||||||
.map(|selection| {
|
.map(|selection| {
|
||||||
let mut record = Record::new();
|
let mut record = Record::new();
|
||||||
for attr in &attrs {
|
for attr in &attrs {
|
||||||
|
@ -373,25 +362,25 @@ fn execute_selector_query_with_attributes(
|
||||||
Value::record(record, span)
|
Value::record(record, span)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
Value::list(vals, span)
|
Ok(Value::list(vals, span))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn execute_selector_query(
|
fn execute_selector_query(
|
||||||
input_string: &str,
|
input_string: &str,
|
||||||
query_string: &str,
|
query_string: Spanned<String>,
|
||||||
as_html: bool,
|
as_html: bool,
|
||||||
inspect: bool,
|
inspect: Spanned<bool>,
|
||||||
span: Span,
|
span: Span,
|
||||||
) -> Value {
|
) -> Result<Value, LabeledError> {
|
||||||
let doc = Html::parse_fragment(input_string);
|
let doc = Html::parse_fragment(input_string);
|
||||||
|
|
||||||
let vals: Vec<Value> = match as_html {
|
let vals: Vec<Value> = match as_html {
|
||||||
true => doc
|
true => doc
|
||||||
.select(&css(query_string, inspect))
|
.select(&fallible_css(query_string, inspect)?)
|
||||||
.map(|selection| Value::string(selection.html(), span))
|
.map(|selection| Value::string(selection.html(), span))
|
||||||
.collect(),
|
.collect(),
|
||||||
false => doc
|
false => doc
|
||||||
.select(&css(query_string, inspect))
|
.select(&fallible_css(query_string, inspect)?)
|
||||||
.map(|selection| {
|
.map(|selection| {
|
||||||
Value::list(
|
Value::list(
|
||||||
selection
|
selection
|
||||||
|
@ -404,7 +393,28 @@ fn execute_selector_query(
|
||||||
.collect(),
|
.collect(),
|
||||||
};
|
};
|
||||||
|
|
||||||
Value::list(vals, span)
|
Ok(Value::list(vals, span))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fallible_css(
|
||||||
|
selector: Spanned<String>,
|
||||||
|
inspect: Spanned<bool>,
|
||||||
|
) -> Result<ScraperSelector, LabeledError> {
|
||||||
|
if inspect.item {
|
||||||
|
ScraperSelector::parse("html").map_err(|e| {
|
||||||
|
LabeledError::new("CSS query parse error")
|
||||||
|
.with_label(e.to_string(), inspect.span)
|
||||||
|
.with_help(
|
||||||
|
"cannot parse query `html` as a valid CSS selector, possibly an internal error",
|
||||||
|
)
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
ScraperSelector::parse(&selector.item).map_err(|e| {
|
||||||
|
LabeledError::new("CSS query parse error")
|
||||||
|
.with_label(e.to_string(), selector.span)
|
||||||
|
.with_help("cannot parse query as a valid CSS selector")
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn css(selector: &str, inspect: bool) -> ScraperSelector {
|
pub fn css(selector: &str, inspect: bool) -> ScraperSelector {
|
||||||
|
@ -433,15 +443,23 @@ mod tests {
|
||||||
<a href="https://example.com" target="_self">Example</a>
|
<a href="https://example.com" target="_self">Example</a>
|
||||||
"#;
|
"#;
|
||||||
|
|
||||||
|
fn null_spanned<T: ToOwned + ?Sized>(input: &T) -> Spanned<T::Owned> {
|
||||||
|
Spanned {
|
||||||
|
item: input.to_owned(),
|
||||||
|
span: Span::unknown(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_first_child_is_not_empty() {
|
fn test_first_child_is_not_empty() {
|
||||||
assert!(!execute_selector_query(
|
assert!(!execute_selector_query(
|
||||||
SIMPLE_LIST,
|
SIMPLE_LIST,
|
||||||
"li:first-child",
|
null_spanned("li:first-child"),
|
||||||
false,
|
|
||||||
false,
|
false,
|
||||||
|
null_spanned(&false),
|
||||||
Span::test_data()
|
Span::test_data()
|
||||||
)
|
)
|
||||||
|
.unwrap()
|
||||||
.is_empty())
|
.is_empty())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -449,11 +467,12 @@ mod tests {
|
||||||
fn test_first_child() {
|
fn test_first_child() {
|
||||||
let item = execute_selector_query(
|
let item = execute_selector_query(
|
||||||
SIMPLE_LIST,
|
SIMPLE_LIST,
|
||||||
"li:first-child",
|
null_spanned("li:first-child"),
|
||||||
false,
|
|
||||||
false,
|
false,
|
||||||
|
null_spanned(&false),
|
||||||
Span::test_data(),
|
Span::test_data(),
|
||||||
);
|
)
|
||||||
|
.unwrap();
|
||||||
let config = nu_protocol::Config::default();
|
let config = nu_protocol::Config::default();
|
||||||
let out = item.to_expanded_string("\n", &config);
|
let out = item.to_expanded_string("\n", &config);
|
||||||
assert_eq!("[[Coffee]]".to_string(), out)
|
assert_eq!("[[Coffee]]".to_string(), out)
|
||||||
|
@ -463,11 +482,12 @@ mod tests {
|
||||||
fn test_nested_text_nodes() {
|
fn test_nested_text_nodes() {
|
||||||
let item = execute_selector_query(
|
let item = execute_selector_query(
|
||||||
NESTED_TEXT,
|
NESTED_TEXT,
|
||||||
"p:first-child",
|
null_spanned("p:first-child"),
|
||||||
false,
|
|
||||||
false,
|
false,
|
||||||
|
null_spanned(&false),
|
||||||
Span::test_data(),
|
Span::test_data(),
|
||||||
);
|
)
|
||||||
|
.unwrap();
|
||||||
let out = item
|
let out = item
|
||||||
.into_list()
|
.into_list()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
@ -492,7 +512,7 @@ mod tests {
|
||||||
fn test_multiple_attributes() {
|
fn test_multiple_attributes() {
|
||||||
let item = execute_selector_query_with_attributes(
|
let item = execute_selector_query_with_attributes(
|
||||||
MULTIPLE_ATTRIBUTES,
|
MULTIPLE_ATTRIBUTES,
|
||||||
"a",
|
null_spanned("a"),
|
||||||
&Value::list(
|
&Value::list(
|
||||||
vec![
|
vec![
|
||||||
Value::string("href".to_string(), Span::unknown()),
|
Value::string("href".to_string(), Span::unknown()),
|
||||||
|
@ -500,9 +520,10 @@ mod tests {
|
||||||
],
|
],
|
||||||
Span::unknown(),
|
Span::unknown(),
|
||||||
),
|
),
|
||||||
false,
|
null_spanned(&false),
|
||||||
Span::test_data(),
|
Span::test_data(),
|
||||||
);
|
)
|
||||||
|
.unwrap();
|
||||||
let out = item
|
let out = item
|
||||||
.into_list()
|
.into_list()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
|
Loading…
Reference in a new issue