Std xml utils (#8437)

# Description

Add `xaccess`,`xupdate` and `xinsert` scripts to standard library. They
allow accessing and manipulating data in new xml format
https://github.com/nushell/nushell/pull/7947 with relative ease.

Access some data in nushell xml structure:

![image](https://user-images.githubusercontent.com/17511668/224785447-317359e2-1430-4dfc-9307-73f1d5e50098.png)

Update attributes of xml tags matching a path:

![image](https://user-images.githubusercontent.com/17511668/224785506-85e9aa30-b36b-43db-af1d-2f4460563124.png)


# User-Facing Changes

New commands `std xaccess`, `std xupdate` and `std xinsert`

# Tests + Formatting

Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect` to check that you're using the standard code
style
- `cargo test --workspace` to check that all tests pass

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```

# After Submitting

If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
This commit is contained in:
Artemiy 2023-04-05 21:54:08 +03:00 committed by GitHub
parent 22142bd4ae
commit f723bc6989
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 237 additions and 0 deletions

View file

@ -385,6 +385,213 @@ export def "log debug" [message: string] {
print --stderr $"(ansi default_dimmed)DBG|(now)|($message)(ansi reset)" print --stderr $"(ansi default_dimmed)DBG|(now)|($message)(ansi reset)"
} }
# Utility functions to read, change and create XML data in format supported
# by `to xml` and `from xml` commands
# Get all xml entries matching simple xpath-inspired query
export def xaccess [
path: list # List of steps. Each step can be a
# 1. String with tag name. Finds all children with specified name. Equivalent to `child::A` in xpath
# 2. `*` string. Get all children without any filter. Equivalent to `descendant` in xpath
# 3. Int. Select n-th among nodes selected by previous path. Equivalent to `(...)[1]` in xpath, but is indexed from 0.
# 4. Closure. Predicate accepting entry. Selects all entries among nodes selected by previous path for which predicate returns true.
] {
let input = $in
if ($path | is-empty) {
let path_span = (metadata $path).span
error make {msg: 'Empty path provided'
label: {text: 'Use a non-empty list of path steps'
start: $path_span.start end: $path_span.end}}
}
# In xpath first element in path is applied to root element
# this way it is possible to apply first step to root element
# of nu xml without unrolling one step of loop
mut values = ()
$values = {content: [ { content: $input } ] }
for $step in ($path) {
match ($step | describe) {
'string' => {
if $step == '*' {
$values = ($values.content | flatten)
} else {
$values = ($values.content | flatten | where tag == $step)
}
},
'int' => {
$values = [ ($values | get $step) ]
},
'closure' => {
$values = ($values | where {|x| do $step $x})
},
$type => {
let step_span = (metadata $step).span
error make {msg: $'Incorrect path step type ($type)'
label: {text: 'Use a string or int as a step'
start: $step_span.start end: $step_span.end}}
}
}
if ($values | is-empty) {
return []
}
}
$values
}
def xupdate-string-step [ step: string rest: list updater: closure ] {
let input = $in
# Get a list of elements to be updated and their indices
let to_update = ($input.content | enumerate | filter {|it|
let item = $it.item
$step == '*' or $item.tag == $step
})
if ($to_update | is-empty) {
return $input
}
let new_values = ($to_update.item | xupdate-internal $rest $updater)
mut reenumerated_new_values = ($to_update.index | zip $new_values | each {|x| {index: $x.0 item: $x.1}})
mut new_content = []
for it in ($input.content | enumerate) {
let item = $it.item
let idx = $it.index
let next = (if (not ($reenumerated_new_values | is-empty)) and $idx == $reenumerated_new_values.0.index {
let tmp = $reenumerated_new_values.0
$reenumerated_new_values = ($reenumerated_new_values | skip 1)
$tmp.item
} else {
$item
})
$new_content = ($new_content | append $next)
}
{tag: $input.tag attributes: $input.attributes content: $new_content}
}
def xupdate-int-step [ step: int rest: list updater: closure ] {
$in | enumerate | each {|it|
let item = $it.item
let idx = $it.index
if $idx == $step {
[ $item ] | xupdate-internal $rest $updater | get 0
} else {
$item
}
}
}
def xupdate-closure-step [ step: closure rest: list updater: closure ] {
$in | each {|it|
if (do $step $it) {
[ $it ] | xupdate-internal $rest $updater | get 0
} else {
$it
}
}
}
def xupdate-internal [ path: list updater: closure ] {
let input = $in
if ($path | is-empty) {
$input | each $updater
} else {
let step = $path.0
let rest = ($path | skip 1)
match ($step | describe) {
'string' => {
$input | each {|x| $x | xupdate-string-step $step $rest $updater}
},
'int' => {
$input | xupdate-int-step $step $rest $updater
},
'closure' => {
$input | xupdate-closure-step $step $rest $updater
},
$type => {
let step_span = (metadata $step).span
error make {msg: $'Incorrect path step type ($type)'
label: {text: 'Use a string or int as a step'
start: $step_span.start end: $step_span.end}}
}
}
}
}
# Update xml data entries matching simple xpath-inspired query
export def xupdate [
path: list # List of steps. Each step can be a
# 1. String with tag name. Finds all children with specified name. Equivalent to `child::A` in xpath
# 2. `*` string. Get all children without any filter. Equivalent to `descendant` in xpath
# 3. Int. Select n-th among nodes selected by previous path. Equivalent to `(...)[1]` in xpath, but is indexed from 0.
# 4. Closure. Predicate accepting entry. Selects all entries among nodes selected by previous path for which predicate returns true.
updater: closure # A closure used to transform entries matching path.
] {
{tag:? attributes:? content: [$in]} | xupdate-internal $path $updater | get content.0
}
# Get type of an xml entry
#
# Possible types are 'tag', 'text', 'pi' and 'comment'
export def xtype [] {
let input = $in
if (($input | describe) == 'string' or
($input.tag? == null and $input.attributes? == null and ($input.content? | describe) == 'string')) {
'text'
} else if $input.tag? == '!' {
'comment'
} else if $input.tag? != null and ($input.tag? | str starts-with '?') {
'pi'
} else if $input.tag? != null {
'tag'
} else {
error make {msg: 'Not an xml emtry. Check valid types of xml entries via `help to xml`'}
}
}
# Insert new entry to elements matching simple xpath-inspired query
export def xinsert [
path: list # List of steps. Each step can be a
# 1. String with tag name. Finds all children with specified name. Equivalent to `child::A` in xpath
# 2. `*` string. Get all children without any filter. Equivalent to `descendant` in xpath
# 3. Int. Select n-th among nodes selected by previous path. Equivalent to `(...)[1]` in xpath, but is indexed from 0.
# 4. Closure. Predicate accepting entry. Selects all entries among nodes selected by previous path for which predicate returns true.
new_entry: record # A new entry to insert into `content` field of record at specified position
position?: int # Position to insert `new_entry` into. If specified inserts entry at given position (or end if
# position is greater than number of elements) in content of all entries of input matched by
# path. If not specified inserts at the end.
] {
$in | xupdate $path {|entry|
match ($entry | xtype) {
'tag' => {
let new_content = if $position == null {
$entry.content | append $new_entry
} else {
let position = if $position > ($entry.content | length) {
$entry.content | length
} else {
$position
}
$entry.content | insert $position $new_entry
}
{tag: $entry.tag attributes: $entry.attributes content: $new_content}
},
_ => (error make {msg: 'Can insert entry only into content of a tag node'})
}
}
}
# print a command name as dimmed and italic # print a command name as dimmed and italic
def pretty-command [] { def pretty-command [] {
let command = $in let command = $in

View file

@ -0,0 +1,30 @@
use std.nu "xaccess"
use std.nu "xupdate"
use std.nu "xinsert"
use std.nu "assert equal"
export def test_xml_xaccess [] {
let sample_xml = ('<a><b><c a="b"></c></b><c></c><d><e>z</e><e>x</e></d></a>' | from xml)
assert equal ($sample_xml | xaccess [a]) [$sample_xml]
assert equal ($sample_xml | xaccess [*]) [$sample_xml]
assert equal ($sample_xml | xaccess [* d e]) [[tag, attributes, content]; [e, {}, [[tag, attributes, content]; [null, null, z]]], [e, {}, [[tag, attributes, content]; [null, null, x]]]]
assert equal ($sample_xml | xaccess [* d e 1]) [[tag, attributes, content]; [e, {}, [[tag, attributes, content]; [null, null, x]]]]
assert equal ($sample_xml | xaccess [* * * {|e| $e.attributes != {}}]) [[tag, attributes, content]; [c, {a: b}, []]]
}
export def test_xml_xupdate [] {
let sample_xml = ('<a><b><c a="b"></c></b><c></c><d><e>z</e><e>x</e></d></a>' | from xml)
assert equal ($sample_xml | xupdate [*] {|x| $x | update attributes {i: j}}) ('<a i="j"><b><c a="b"></c></b><c></c><d><e>z</e><e>x</e></d></a>' | from xml)
assert equal ($sample_xml | xupdate [* d e *] {|x| $x | update content 'nushell'}) ('<a><b><c a="b"></c></b><c></c><d><e>nushell</e><e>nushell</e></d></a>' | from xml)
assert equal ($sample_xml | xupdate [* * * {|e| $e.attributes != {}}] {|x| $x | update content ['xml']}) {tag: a, attributes: {}, content: [[tag, attributes, content]; [b, {}, [[tag, attributes, content]; [c, {a: b}, [xml]]]], [c, {}, []], [d, {}, [[tag, attributes, content]; [e, {}, [[tag, attributes, content]; [null, null, z]]], [e, {}, [[tag, attributes, content]; [null, null, x]]]]]]}
}
export def test_xml_xinsert [] {
let sample_xml = ('<a><b><c a="b"></c></b><c></c><d><e>z</e><e>x</e></d></a>' | from xml)
assert equal ($sample_xml | xinsert [a] {tag: b attributes:{} content: []}) ('<a><b><c a="b"></c></b><c></c><d><e>z</e><e>x</e></d><b></b></a>' | from xml)
assert equal ($sample_xml | xinsert [a d *] {tag: null attributes: null content: 'n'} | to xml) '<a><b><c a="b"></c></b><c></c><d><e>zn</e><e>xn</e></d></a>'
assert equal ($sample_xml | xinsert [a *] {tag: null attributes: null content: 'n'}) ('<a><b><c a="b"></c>n</b><c>n</c><d><e>z</e><e>x</e>n</d></a>' | from xml)
}