Added support for multiple attributes to query web -a (#13256)

# Description

Allows specifying multiple attributes to retrieve from the selected
nodes. E.g. you may want to select both hrefs and targets from the list
of links:

```nushell
.... | query web --query a --attribute [href target]
```
# User-Facing Changes

`query web --attribute` previously accepted a string. Now it accepts
either a string or a list of strings.

The shape definition for this flag was relaxed temporarily, until
nushell/nushell#13253 is fixed.
This commit is contained in:
Bruce Weirdan 2024-06-28 19:50:20 +02:00 committed by GitHub
parent 720b4cbd01
commit 4f8d82bb88
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -24,10 +24,20 @@ impl SimplePluginCommand for QueryWeb {
.switch("as-html", "return the query output as html", Some('m')) .switch("as-html", "return the query output as html", Some('m'))
.named( .named(
"attribute", "attribute",
SyntaxShape::String, SyntaxShape::Any,
"downselect based on the given attribute", "downselect based on the given attribute",
Some('a'), Some('a'),
) )
// TODO: use detailed shape when https://github.com/nushell/nushell/issues/13253 is resolved
// .named(
// "attribute",
// SyntaxShape::OneOf(vec![
// SyntaxShape::List(Box::new(SyntaxShape::String)),
// SyntaxShape::String,
// ]),
// "downselect based on the given attribute",
// Some('a'),
// )
.named( .named(
"as-table", "as-table",
SyntaxShape::List(Box::new(SyntaxShape::String)), SyntaxShape::List(Box::new(SyntaxShape::String)),
@ -79,6 +89,11 @@ pub fn web_examples() -> Vec<Example<'static>> {
example: "http get https://example.org | query web --query a --attribute href", example: "http get https://example.org | query web --query a --attribute href",
description: "Retrieve a specific html attribute instead of the default text", description: "Retrieve a specific html attribute instead of the default text",
result: None, result: None,
},
Example {
example: r#"http get https://www.rust-lang.org | query web --query 'meta[property^="og:"]' --attribute [ property content ]"#,
description: r#"Retrieve the OpenGraph properties (`<meta property="og:...">`) from a web page"#,
result: None,
} }
] ]
} }
@ -86,7 +101,7 @@ pub fn web_examples() -> Vec<Example<'static>> {
pub struct Selector { pub struct Selector {
pub query: String, pub query: String,
pub as_html: bool, pub as_html: bool,
pub attribute: String, pub attribute: Value,
pub as_table: Value, pub as_table: Value,
pub inspect: bool, pub inspect: bool,
} }
@ -96,7 +111,7 @@ impl Selector {
Selector { Selector {
query: String::new(), query: String::new(),
as_html: false, as_html: false,
attribute: String::new(), attribute: Value::string("".to_string(), Span::unknown()),
as_table: Value::string("".to_string(), Span::unknown()), as_table: Value::string("".to_string(), Span::unknown()),
inspect: false, inspect: false,
} }
@ -113,7 +128,9 @@ pub fn parse_selector_params(call: &EvaluatedCall, input: &Value) -> Result<Valu
let head = call.head; let head = call.head;
let query: Option<Spanned<String>> = call.get_flag("query")?; let query: Option<Spanned<String>> = call.get_flag("query")?;
let as_html = call.has_flag("as-html")?; let as_html = call.has_flag("as-html")?;
let attribute = call.get_flag("attribute")?.unwrap_or_default(); let attribute = call
.get_flag("attribute")?
.unwrap_or_else(|| Value::nothing(head));
let as_table: Value = call let as_table: Value = call
.get_flag("as-table")? .get_flag("as-table")?
.unwrap_or_else(|| Value::nothing(head)); .unwrap_or_else(|| Value::nothing(head));
@ -160,11 +177,19 @@ fn begin_selector_query(input_html: String, selector: Selector, span: Span) -> V
selector.inspect, selector.inspect,
span, span,
) )
} else if let Value::List { .. } = selector.attribute {
execute_selector_query_with_attributes(
input_html.as_str(),
selector.query.as_str(),
&selector.attribute,
selector.inspect,
span,
)
} else { } else {
execute_selector_query_with_attribute( execute_selector_query_with_attribute(
input_html.as_str(), input_html.as_str(),
selector.query.as_str(), selector.query.as_str(),
selector.attribute.as_str(), selector.attribute.as_str().unwrap_or(""),
selector.inspect, selector.inspect,
span, span,
) )
@ -317,6 +342,40 @@ fn execute_selector_query_with_attribute(
Value::list(vals, span) Value::list(vals, span)
} }
fn execute_selector_query_with_attributes(
input_string: &str,
query_string: &str,
attributes: &Value,
inspect: bool,
span: Span,
) -> Value {
let doc = Html::parse_fragment(input_string);
let mut attrs: Vec<String> = Vec::new();
if let Value::List { vals, .. } = &attributes {
for x in vals {
if let Value::String { val, .. } = x {
attrs.push(val.to_string())
}
}
}
let vals: Vec<Value> = doc
.select(&css(query_string, inspect))
.map(|selection| {
let mut record = Record::new();
for attr in &attrs {
record.push(
attr.to_string(),
Value::string(selection.value().attr(attr).unwrap_or("").to_string(), span),
);
}
Value::record(record, span)
})
.collect();
Value::list(vals, span)
}
fn execute_selector_query( fn execute_selector_query(
input_string: &str, input_string: &str,
query_string: &str, query_string: &str,
@ -369,6 +428,10 @@ mod tests {
"#; "#;
const NESTED_TEXT: &str = r#"<p>Hello there, <span style="color: red;">World</span></p>"#; const NESTED_TEXT: &str = r#"<p>Hello there, <span style="color: red;">World</span></p>"#;
const MULTIPLE_ATTRIBUTES: &str = r#"
<a href="https://example.org" target="_blank">Example</a>
<a href="https://example.com" target="_self">Example</a>
"#;
#[test] #[test]
fn test_first_child_is_not_empty() { fn test_first_child_is_not_empty() {
@ -424,4 +487,48 @@ mod tests {
vec![vec!["Hello there, ".to_string(), "World".to_string()]], vec![vec!["Hello there, ".to_string(), "World".to_string()]],
); );
} }
#[test]
fn test_multiple_attributes() {
let item = execute_selector_query_with_attributes(
MULTIPLE_ATTRIBUTES,
"a",
&Value::list(
vec![
Value::string("href".to_string(), Span::unknown()),
Value::string("target".to_string(), Span::unknown()),
],
Span::unknown(),
),
false,
Span::test_data(),
);
let out = item
.into_list()
.unwrap()
.into_iter()
.map(|matches| {
matches
.into_record()
.unwrap()
.into_iter()
.map(|(key, value)| (key, value.coerce_into_string().unwrap()))
.collect::<Vec<(String, String)>>()
})
.collect::<Vec<Vec<(String, String)>>>();
assert_eq!(
out,
vec![
vec![
("href".to_string(), "https://example.org".to_string()),
("target".to_string(), "_blank".to_string())
],
vec![
("href".to_string(), "https://example.com".to_string()),
("target".to_string(), "_self".to_string())
]
]
)
}
} }