From f723bc6989d18d0751ff3b71efad28bd07bd91a0 Mon Sep 17 00:00:00 2001 From: Artemiy Date: Wed, 5 Apr 2023 21:54:08 +0300 Subject: [PATCH] Std xml utils (#8437) # Description Add `xaccess`,`xupdate` and `xinsert` scripts to standard library. They allow accessing and manipulating data in new xml format https://github.com/nushell/nushell/pull/7947 with relative ease. Access some data in nushell xml structure: ![image](https://user-images.githubusercontent.com/17511668/224785447-317359e2-1430-4dfc-9307-73f1d5e50098.png) Update attributes of xml tags matching a path: ![image](https://user-images.githubusercontent.com/17511668/224785506-85e9aa30-b36b-43db-af1d-2f4460563124.png) # User-Facing Changes New commands `std xaccess`, `std xupdate` and `std xinsert` # Tests + Formatting Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A clippy::needless_collect` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` # After Submitting If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. --- crates/nu-utils/standard_library/std.nu | 207 +++++++++++++++++++ crates/nu-utils/standard_library/test_xml.nu | 30 +++ 2 files changed, 237 insertions(+) create mode 100644 crates/nu-utils/standard_library/test_xml.nu diff --git a/crates/nu-utils/standard_library/std.nu b/crates/nu-utils/standard_library/std.nu index d50753caaa..4508d70963 100644 --- a/crates/nu-utils/standard_library/std.nu +++ b/crates/nu-utils/standard_library/std.nu @@ -385,6 +385,213 @@ export def "log debug" [message: string] { print --stderr $"(ansi default_dimmed)DBG|(now)|($message)(ansi reset)" } +# Utility functions to read, change and create XML data in format supported +# by `to xml` and `from xml` commands + +# Get all xml entries matching simple xpath-inspired query +export def xaccess [ + path: list # List of steps. Each step can be a + # 1. String with tag name. Finds all children with specified name. Equivalent to `child::A` in xpath + # 2. `*` string. Get all children without any filter. Equivalent to `descendant` in xpath + # 3. Int. Select n-th among nodes selected by previous path. Equivalent to `(...)[1]` in xpath, but is indexed from 0. + # 4. Closure. Predicate accepting entry. Selects all entries among nodes selected by previous path for which predicate returns true. +] { + let input = $in + if ($path | is-empty) { + let path_span = (metadata $path).span + error make {msg: 'Empty path provided' + label: {text: 'Use a non-empty list of path steps' + start: $path_span.start end: $path_span.end}} + } + # In xpath first element in path is applied to root element + # this way it is possible to apply first step to root element + # of nu xml without unrolling one step of loop + mut values = () + $values = {content: [ { content: $input } ] } + for $step in ($path) { + match ($step | describe) { + 'string' => { + if $step == '*' { + $values = ($values.content | flatten) + } else { + $values = ($values.content | flatten | where tag == $step) + } + }, + 'int' => { + $values = [ ($values | get $step) ] + }, + 'closure' => { + $values = ($values | where {|x| do $step $x}) + }, + $type => { + let step_span = (metadata $step).span + error make {msg: $'Incorrect path step type ($type)' + label: {text: 'Use a string or int as a step' + start: $step_span.start end: $step_span.end}} + } + } + + if ($values | is-empty) { + return [] + } + } + $values +} + +def xupdate-string-step [ step: string rest: list updater: closure ] { + let input = $in + + # Get a list of elements to be updated and their indices + let to_update = ($input.content | enumerate | filter {|it| + let item = $it.item + $step == '*' or $item.tag == $step + }) + + if ($to_update | is-empty) { + return $input + } + + let new_values = ($to_update.item | xupdate-internal $rest $updater) + + mut reenumerated_new_values = ($to_update.index | zip $new_values | each {|x| {index: $x.0 item: $x.1}}) + + mut new_content = [] + for it in ($input.content | enumerate) { + let item = $it.item + let idx = $it.index + + let next = (if (not ($reenumerated_new_values | is-empty)) and $idx == $reenumerated_new_values.0.index { + let tmp = $reenumerated_new_values.0 + $reenumerated_new_values = ($reenumerated_new_values | skip 1) + $tmp.item + } else { + $item + }) + + $new_content = ($new_content | append $next) + } + + {tag: $input.tag attributes: $input.attributes content: $new_content} +} + +def xupdate-int-step [ step: int rest: list updater: closure ] { + $in | enumerate | each {|it| + let item = $it.item + let idx = $it.index + + if $idx == $step { + [ $item ] | xupdate-internal $rest $updater | get 0 + } else { + $item + } + } +} + +def xupdate-closure-step [ step: closure rest: list updater: closure ] { + $in | each {|it| + if (do $step $it) { + [ $it ] | xupdate-internal $rest $updater | get 0 + } else { + $it + } + } +} + +def xupdate-internal [ path: list updater: closure ] { + let input = $in + + if ($path | is-empty) { + $input | each $updater + } else { + let step = $path.0 + let rest = ($path | skip 1) + + match ($step | describe) { + 'string' => { + $input | each {|x| $x | xupdate-string-step $step $rest $updater} + }, + 'int' => { + $input | xupdate-int-step $step $rest $updater + }, + 'closure' => { + $input | xupdate-closure-step $step $rest $updater + }, + $type => { + let step_span = (metadata $step).span + error make {msg: $'Incorrect path step type ($type)' + label: {text: 'Use a string or int as a step' + start: $step_span.start end: $step_span.end}} + } + } + } + +} + +# Update xml data entries matching simple xpath-inspired query +export def xupdate [ + path: list # List of steps. Each step can be a + # 1. String with tag name. Finds all children with specified name. Equivalent to `child::A` in xpath + # 2. `*` string. Get all children without any filter. Equivalent to `descendant` in xpath + # 3. Int. Select n-th among nodes selected by previous path. Equivalent to `(...)[1]` in xpath, but is indexed from 0. + # 4. Closure. Predicate accepting entry. Selects all entries among nodes selected by previous path for which predicate returns true. + updater: closure # A closure used to transform entries matching path. +] { + {tag:? attributes:? content: [$in]} | xupdate-internal $path $updater | get content.0 +} + +# Get type of an xml entry +# +# Possible types are 'tag', 'text', 'pi' and 'comment' +export def xtype [] { + let input = $in + if (($input | describe) == 'string' or + ($input.tag? == null and $input.attributes? == null and ($input.content? | describe) == 'string')) { + 'text' + } else if $input.tag? == '!' { + 'comment' + } else if $input.tag? != null and ($input.tag? | str starts-with '?') { + 'pi' + } else if $input.tag? != null { + 'tag' + } else { + error make {msg: 'Not an xml emtry. Check valid types of xml entries via `help to xml`'} + } +} + +# Insert new entry to elements matching simple xpath-inspired query +export def xinsert [ + path: list # List of steps. Each step can be a + # 1. String with tag name. Finds all children with specified name. Equivalent to `child::A` in xpath + # 2. `*` string. Get all children without any filter. Equivalent to `descendant` in xpath + # 3. Int. Select n-th among nodes selected by previous path. Equivalent to `(...)[1]` in xpath, but is indexed from 0. + # 4. Closure. Predicate accepting entry. Selects all entries among nodes selected by previous path for which predicate returns true. + new_entry: record # A new entry to insert into `content` field of record at specified position + position?: int # Position to insert `new_entry` into. If specified inserts entry at given position (or end if + # position is greater than number of elements) in content of all entries of input matched by + # path. If not specified inserts at the end. +] { + $in | xupdate $path {|entry| + match ($entry | xtype) { + 'tag' => { + let new_content = if $position == null { + $entry.content | append $new_entry + } else { + let position = if $position > ($entry.content | length) { + $entry.content | length + } else { + $position + } + $entry.content | insert $position $new_entry + } + + + {tag: $entry.tag attributes: $entry.attributes content: $new_content} + }, + _ => (error make {msg: 'Can insert entry only into content of a tag node'}) + } + } +} + # print a command name as dimmed and italic def pretty-command [] { let command = $in diff --git a/crates/nu-utils/standard_library/test_xml.nu b/crates/nu-utils/standard_library/test_xml.nu new file mode 100644 index 0000000000..d8418d8296 --- /dev/null +++ b/crates/nu-utils/standard_library/test_xml.nu @@ -0,0 +1,30 @@ +use std.nu "xaccess" +use std.nu "xupdate" +use std.nu "xinsert" +use std.nu "assert equal" + +export def test_xml_xaccess [] { + let sample_xml = ('zx' | from xml) + + assert equal ($sample_xml | xaccess [a]) [$sample_xml] + assert equal ($sample_xml | xaccess [*]) [$sample_xml] + assert equal ($sample_xml | xaccess [* d e]) [[tag, attributes, content]; [e, {}, [[tag, attributes, content]; [null, null, z]]], [e, {}, [[tag, attributes, content]; [null, null, x]]]] + assert equal ($sample_xml | xaccess [* d e 1]) [[tag, attributes, content]; [e, {}, [[tag, attributes, content]; [null, null, x]]]] + assert equal ($sample_xml | xaccess [* * * {|e| $e.attributes != {}}]) [[tag, attributes, content]; [c, {a: b}, []]] +} + +export def test_xml_xupdate [] { + let sample_xml = ('zx' | from xml) + + assert equal ($sample_xml | xupdate [*] {|x| $x | update attributes {i: j}}) ('zx' | from xml) + assert equal ($sample_xml | xupdate [* d e *] {|x| $x | update content 'nushell'}) ('nushellnushell' | from xml) + assert equal ($sample_xml | xupdate [* * * {|e| $e.attributes != {}}] {|x| $x | update content ['xml']}) {tag: a, attributes: {}, content: [[tag, attributes, content]; [b, {}, [[tag, attributes, content]; [c, {a: b}, [xml]]]], [c, {}, []], [d, {}, [[tag, attributes, content]; [e, {}, [[tag, attributes, content]; [null, null, z]]], [e, {}, [[tag, attributes, content]; [null, null, x]]]]]]} +} + +export def test_xml_xinsert [] { + let sample_xml = ('zx' | from xml) + + assert equal ($sample_xml | xinsert [a] {tag: b attributes:{} content: []}) ('zx' | from xml) + assert equal ($sample_xml | xinsert [a d *] {tag: null attributes: null content: 'n'} | to xml) 'znxn' + assert equal ($sample_xml | xinsert [a *] {tag: null attributes: null content: 'n'}) ('nnzxn' | from xml) +} \ No newline at end of file