query command with json, web, xml (#870)

* query command with json, web, xml

* query xml now working

* clippy

* comment out web tests

* Initial work on query web

For now we can query everything except tables

* Support for querying tables

Now we can query multiple tables just like before, now the only thing
missing is the test coverage

* finish off

* comment out web test

Co-authored-by: Luccas Mateus de Medeiros Gomes <luccasmmg@gmail.com>
This commit is contained in:
Darren Schroeder 2022-02-01 12:45:48 -06:00 committed by GitHub
parent ebaa584c5e
commit 004d7b5ff0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 2527 additions and 31 deletions

435
Cargo.lock generated
View file

@ -463,7 +463,7 @@ checksum = "58549f1842da3080ce63002102d5bc954c7bc843d4f47818e642abdc36253552"
dependencies = [ dependencies = [
"chrono", "chrono",
"chrono-tz-build", "chrono-tz-build",
"phf", "phf 0.10.0",
] ]
[[package]] [[package]]
@ -473,8 +473,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db058d493fb2f65f41861bfed7e3fe6335264a9f0f92710cab5bdf01fef09069" checksum = "db058d493fb2f65f41861bfed7e3fe6335264a9f0f92710cab5bdf01fef09069"
dependencies = [ dependencies = [
"parse-zoneinfo", "parse-zoneinfo",
"phf", "phf 0.10.0",
"phf_codegen", "phf_codegen 0.10.0",
] ]
[[package]] [[package]]
@ -513,6 +513,12 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
[[package]]
name = "convert_case"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
[[package]] [[package]]
name = "core-foundation" name = "core-foundation"
version = "0.9.2" version = "0.9.2"
@ -632,6 +638,33 @@ dependencies = [
"generic-array 0.14.4", "generic-array 0.14.4",
] ]
[[package]]
name = "cssparser"
version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a"
dependencies = [
"cssparser-macros",
"dtoa-short",
"itoa 0.4.8",
"matches",
"phf 0.8.0",
"proc-macro2",
"quote",
"smallvec",
"syn",
]
[[package]]
name = "cssparser-macros"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e"
dependencies = [
"quote",
"syn",
]
[[package]] [[package]]
name = "cstr_core" name = "cstr_core"
version = "0.2.4" version = "0.2.4"
@ -690,6 +723,19 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35" checksum = "b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35"
[[package]]
name = "derive_more"
version = "0.99.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
dependencies = [
"convert_case",
"proc-macro2",
"quote",
"rustc_version",
"syn",
]
[[package]] [[package]]
name = "dialoguer" name = "dialoguer"
version = "0.9.0" version = "0.9.0"
@ -798,6 +844,15 @@ version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0" checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0"
[[package]]
name = "dtoa-short"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bde03329ae10e79ede66c9ce4dc930aa8599043b0743008548680f25b91502d6"
dependencies = [
"dtoa",
]
[[package]] [[package]]
name = "dtparse" name = "dtparse"
version = "1.2.0" version = "1.2.0"
@ -817,6 +872,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "453440c271cf5577fd2a40e4942540cb7d0d2f85e27c8d07dd0023c925a67541" checksum = "453440c271cf5577fd2a40e4942540cb7d0d2f85e27c8d07dd0023c925a67541"
[[package]]
name = "ego-tree"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
[[package]] [[package]]
name = "either" name = "either"
version = "1.6.1" version = "1.6.1"
@ -874,6 +935,7 @@ dependencies = [
"nu_plugin_example", "nu_plugin_example",
"nu_plugin_gstat", "nu_plugin_gstat",
"nu_plugin_inc", "nu_plugin_inc",
"nu_plugin_query",
"pretty_assertions", "pretty_assertions",
"pretty_env_logger", "pretty_env_logger",
"reedline", "reedline",
@ -983,6 +1045,16 @@ dependencies = [
"percent-encoding", "percent-encoding",
] ]
[[package]]
name = "futf"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]] [[package]]
name = "futures" name = "futures"
version = "0.3.18" version = "0.3.18"
@ -1072,6 +1144,15 @@ dependencies = [
"slab", "slab",
] ]
[[package]]
name = "fxhash"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
dependencies = [
"byteorder",
]
[[package]] [[package]]
name = "generic-array" name = "generic-array"
version = "0.12.4" version = "0.12.4"
@ -1100,6 +1181,15 @@ dependencies = [
"version_check 0.9.3", "version_check 0.9.3",
] ]
[[package]]
name = "getopts"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
dependencies = [
"unicode-width",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.1.16" version = "0.1.16"
@ -1154,6 +1244,12 @@ dependencies = [
"url", "url",
] ]
[[package]]
name = "gjson"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4599d0e9dce476280e2da1f334811e2b26d63a6b000e13b7b50cc980bae49698"
[[package]] [[package]]
name = "glob" name = "glob"
version = "0.3.0" version = "0.3.0"
@ -1251,6 +1347,20 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "html5ever"
version = "0.25.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b"
dependencies = [
"log",
"mac",
"markup5ever",
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "htmlescape" name = "htmlescape"
version = "0.3.1" version = "0.3.1"
@ -1676,6 +1786,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]] [[package]]
name = "malloc_buf" name = "malloc_buf"
version = "0.0.6" version = "0.0.6"
@ -1685,6 +1801,20 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "markup5ever"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a24f40fb03852d1cdd84330cddcaf98e9ec08a7b7768e952fad3b4cf048ec8fd"
dependencies = [
"log",
"phf 0.8.0",
"phf_codegen 0.8.0",
"string_cache",
"string_cache_codegen",
"tendril",
]
[[package]] [[package]]
name = "matches" name = "matches"
version = "0.1.9" version = "0.1.9"
@ -1840,6 +1970,12 @@ dependencies = [
"tempfile", "tempfile",
] ]
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
[[package]] [[package]]
name = "nix" name = "nix"
version = "0.23.0" version = "0.23.0"
@ -1966,7 +2102,7 @@ dependencies = [
"pathdiff", "pathdiff",
"polars", "polars",
"quick-xml 0.22.0", "quick-xml 0.22.0",
"rand", "rand 0.8.4",
"rayon", "rayon",
"regex", "regex",
"reqwest", "reqwest",
@ -2056,7 +2192,7 @@ version = "0.41.0"
dependencies = [ dependencies = [
"heapless 0.7.9", "heapless 0.7.9",
"nu-ansi-term", "nu-ansi-term",
"rand", "rand 0.8.4",
] ]
[[package]] [[package]]
@ -2137,7 +2273,20 @@ version = "0.1.0"
dependencies = [ dependencies = [
"nu-plugin", "nu-plugin",
"nu-protocol", "nu-protocol",
"semver", "semver 0.11.0",
]
[[package]]
name = "nu_plugin_query"
version = "0.1.0"
dependencies = [
"gjson",
"nu-engine",
"nu-plugin",
"nu-protocol",
"scraper",
"sxd-document",
"sxd-xpath",
] ]
[[package]] [[package]]
@ -2454,6 +2603,12 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]]
name = "peresil"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f658886ed52e196e850cfbbfddab9eaa7f6d90dd0929e264c31e5cec07e09e57"
[[package]] [[package]]
name = "pest" name = "pest"
version = "2.1.3" version = "2.1.3"
@ -2463,13 +2618,34 @@ dependencies = [
"ucd-trie", "ucd-trie",
] ]
[[package]]
name = "phf"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
dependencies = [
"phf_macros",
"phf_shared 0.8.0",
"proc-macro-hack",
]
[[package]] [[package]]
name = "phf" name = "phf"
version = "0.10.0" version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9fc3db1018c4b59d7d582a739436478b6035138b6aecbce989fc91c3e98409f" checksum = "b9fc3db1018c4b59d7d582a739436478b6035138b6aecbce989fc91c3e98409f"
dependencies = [ dependencies = [
"phf_shared", "phf_shared 0.10.0",
]
[[package]]
name = "phf_codegen"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815"
dependencies = [
"phf_generator 0.8.0",
"phf_shared 0.8.0",
] ]
[[package]] [[package]]
@ -2478,8 +2654,18 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
dependencies = [ dependencies = [
"phf_generator", "phf_generator 0.10.0",
"phf_shared", "phf_shared 0.10.0",
]
[[package]]
name = "phf_generator"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526"
dependencies = [
"phf_shared 0.8.0",
"rand 0.7.3",
] ]
[[package]] [[package]]
@ -2488,8 +2674,31 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [ dependencies = [
"phf_shared", "phf_shared 0.10.0",
"rand", "rand 0.8.4",
]
[[package]]
name = "phf_macros"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c"
dependencies = [
"phf_generator 0.8.0",
"phf_shared 0.8.0",
"proc-macro-hack",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "phf_shared"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7"
dependencies = [
"siphasher",
] ]
[[package]] [[package]]
@ -2559,7 +2768,7 @@ dependencies = [
"num_cpus", "num_cpus",
"polars-arrow", "polars-arrow",
"prettytable-rs", "prettytable-rs",
"rand", "rand 0.8.4",
"rand_distr", "rand_distr",
"rayon", "rayon",
"regex", "regex",
@ -2613,6 +2822,12 @@ version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba" checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba"
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]] [[package]]
name = "predicates" name = "predicates"
version = "2.1.0" version = "2.1.0"
@ -2676,6 +2891,12 @@ dependencies = [
"unicode-width", "unicode-width",
] ]
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.33" version = "1.0.33"
@ -2734,6 +2955,20 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "rand"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
dependencies = [
"getrandom 0.1.16",
"libc",
"rand_chacha 0.2.2",
"rand_core 0.5.1",
"rand_hc 0.2.0",
"rand_pcg",
]
[[package]] [[package]]
name = "rand" name = "rand"
version = "0.8.4" version = "0.8.4"
@ -2741,9 +2976,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8"
dependencies = [ dependencies = [
"libc", "libc",
"rand_chacha", "rand_chacha 0.3.1",
"rand_core 0.6.3", "rand_core 0.6.3",
"rand_hc", "rand_hc 0.3.1",
]
[[package]]
name = "rand_chacha"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
dependencies = [
"ppv-lite86",
"rand_core 0.5.1",
] ]
[[package]] [[package]]
@ -2761,6 +3006,9 @@ name = "rand_core"
version = "0.5.1" version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
dependencies = [
"getrandom 0.1.16",
]
[[package]] [[package]]
name = "rand_core" name = "rand_core"
@ -2778,7 +3026,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "964d548f8e7d12e102ef183a0de7e98180c9f8729f555897a857b96e48122d2f" checksum = "964d548f8e7d12e102ef183a0de7e98180c9f8729f555897a857b96e48122d2f"
dependencies = [ dependencies = [
"num-traits", "num-traits",
"rand", "rand 0.8.4",
]
[[package]]
name = "rand_hc"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
dependencies = [
"rand_core 0.5.1",
] ]
[[package]] [[package]]
@ -2790,6 +3047,15 @@ dependencies = [
"rand_core 0.6.3", "rand_core 0.6.3",
] ]
[[package]]
name = "rand_pcg"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
dependencies = [
"rand_core 0.5.1",
]
[[package]] [[package]]
name = "rand_xoshiro" name = "rand_xoshiro"
version = "0.4.0" version = "0.4.0"
@ -3020,6 +3286,15 @@ version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342"
[[package]]
name = "rustc_version"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
dependencies = [
"semver 1.0.4",
]
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.6" version = "1.0.6"
@ -3051,6 +3326,22 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "scraper"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48e02aa790c80c2e494130dec6a522033b6a23603ffc06360e9fe6c611ea2c12"
dependencies = [
"cssparser",
"ego-tree",
"getopts",
"html5ever",
"matches",
"selectors",
"smallvec",
"tendril",
]
[[package]] [[package]]
name = "security-framework" name = "security-framework"
version = "2.4.2" version = "2.4.2"
@ -3074,6 +3365,26 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "selectors"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe"
dependencies = [
"bitflags",
"cssparser",
"derive_more",
"fxhash",
"log",
"matches",
"phf 0.8.0",
"phf_codegen 0.8.0",
"precomputed-hash",
"servo_arc",
"smallvec",
"thin-slice",
]
[[package]] [[package]]
name = "semver" name = "semver"
version = "0.11.0" version = "0.11.0"
@ -3083,6 +3394,12 @@ dependencies = [
"semver-parser", "semver-parser",
] ]
[[package]]
name = "semver"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012"
[[package]] [[package]]
name = "semver-parser" name = "semver-parser"
version = "0.10.2" version = "0.10.2"
@ -3168,6 +3485,16 @@ dependencies = [
"yaml-rust", "yaml-rust",
] ]
[[package]]
name = "servo_arc"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432"
dependencies = [
"nodrop",
"stable_deref_trait",
]
[[package]] [[package]]
name = "sha2" name = "sha2"
version = "0.9.8" version = "0.9.8"
@ -3331,6 +3658,32 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3ff2f71c82567c565ba4b3009a9350a96a7269eaa4001ebedae926230bc2254" checksum = "a3ff2f71c82567c565ba4b3009a9350a96a7269eaa4001ebedae926230bc2254"
[[package]]
name = "string_cache"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "923f0f39b6267d37d23ce71ae7235602134b250ace715dd2c90421998ddac0c6"
dependencies = [
"lazy_static",
"new_debug_unreachable",
"parking_lot",
"phf_shared 0.8.0",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97"
dependencies = [
"phf_generator 0.8.0",
"phf_shared 0.8.0",
"proc-macro2",
"quote",
]
[[package]] [[package]]
name = "strip-ansi-escapes" name = "strip-ansi-escapes"
version = "0.1.1" version = "0.1.1"
@ -3368,6 +3721,27 @@ dependencies = [
"atty", "atty",
] ]
[[package]]
name = "sxd-document"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94d82f37be9faf1b10a82c4bd492b74f698e40082f0f40de38ab275f31d42078"
dependencies = [
"peresil",
"typed-arena",
]
[[package]]
name = "sxd-xpath"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36e39da5d30887b5690e29de4c5ebb8ddff64ebd9933f98a01daaa4fd11b36ea"
dependencies = [
"peresil",
"quick-error",
"sxd-document",
]
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.82" version = "1.0.82"
@ -3415,12 +3789,23 @@ checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"libc", "libc",
"rand", "rand 0.8.4",
"redox_syscall 0.2.10", "redox_syscall 0.2.10",
"remove_dir_all", "remove_dir_all",
"winapi", "winapi",
] ]
[[package]]
name = "tendril"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9ef557cb397a4f0a5a3a628f06515f78563f2209e64d47055d9dc6052bf5e33"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]] [[package]]
name = "term" name = "term"
version = "0.5.2" version = "0.5.2"
@ -3478,6 +3863,12 @@ dependencies = [
"unicode-width", "unicode-width",
] ]
[[package]]
name = "thin-slice"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.30" version = "1.0.30"
@ -3629,6 +4020,12 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642"
[[package]]
name = "typed-arena"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9b2228007eba4120145f785df0f6c92ea538f5a3635a612ecf4e334c8c1446d"
[[package]] [[package]]
name = "typenum" name = "typenum"
version = "1.14.0" version = "1.14.0"
@ -3750,6 +4147,12 @@ dependencies = [
"log", "log",
] ]
[[package]]
name = "utf-8"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]] [[package]]
name = "utf8-width" name = "utf8-width"
version = "0.1.5" version = "0.1.5"

View file

@ -18,6 +18,7 @@ members = [
"crates/nu_plugin_inc", "crates/nu_plugin_inc",
"crates/nu_plugin_gstat", "crates/nu_plugin_gstat",
"crates/nu_plugin_example", "crates/nu_plugin_example",
"crates/nu_plugin_query",
] ]
[dependencies] [dependencies]
@ -48,7 +49,7 @@ pretty_env_logger = "0.4.0"
nu_plugin_inc = { version = "0.1.0", path = "./crates/nu_plugin_inc", optional = true } nu_plugin_inc = { version = "0.1.0", path = "./crates/nu_plugin_inc", optional = true }
nu_plugin_example = { version = "0.1.0", path = "./crates/nu_plugin_example", optional = true } nu_plugin_example = { version = "0.1.0", path = "./crates/nu_plugin_example", optional = true }
nu_plugin_gstat = { version = "0.1.0", path = "./crates/nu_plugin_gstat", optional = true } nu_plugin_gstat = { version = "0.1.0", path = "./crates/nu_plugin_gstat", optional = true }
nu_plugin_query = { version = "0.1.0", path = "./crates/nu_plugin_query", optional = true }
[dev-dependencies] [dev-dependencies]
tempfile = "3.2.0" tempfile = "3.2.0"
@ -73,6 +74,7 @@ extra = [
"dataframe", "dataframe",
"gstat", "gstat",
"zip-support", "zip-support",
"query",
] ]
wasi = ["inc"] wasi = ["inc"]
@ -80,13 +82,12 @@ wasi = ["inc"]
# Stable (Default) # Stable (Default)
inc = ["nu_plugin_inc"] inc = ["nu_plugin_inc"]
example = ["nu_plugin_example"] example = ["nu_plugin_example"]
which = ["nu-command/which"] which = ["nu-command/which"]
# Extra # Extra
gstat = ["nu_plugin_gstat"] gstat = ["nu_plugin_gstat"]
zip-support = ["nu-command/zip"] zip-support = ["nu-command/zip"]
query = ["nu_plugin_query"]
# Dataframe feature for nushell # Dataframe feature for nushell
dataframe = ["nu-command/dataframe"] dataframe = ["nu-command/dataframe"]
@ -111,6 +112,11 @@ name = "nu_plugin_gstat"
path = "src/plugins/nu_plugin_extra_gstat.rs" path = "src/plugins/nu_plugin_extra_gstat.rs"
required-features = ["gstat"] required-features = ["gstat"]
[[bin]]
name = "nu_plugin_query"
path = "src/plugins/nu_plugin_extra_query.rs"
required-features = ["query"]
# Main nu binary # Main nu binary
[[bin]] [[bin]]
name = "engine-q" name = "engine-q"

View file

@ -262,7 +262,7 @@ pub fn get_documentation(
long_desc long_desc
} }
fn get_flags_section(signature: &Signature) -> String { pub fn get_flags_section(signature: &Signature) -> String {
let mut long_desc = String::new(); let mut long_desc = String::new();
long_desc.push_str("\nFlags:\n"); long_desc.push_str("\nFlags:\n");
for flag in &signature.named { for flag in &signature.named {

View file

@ -1,6 +1,6 @@
mod call_ext; mod call_ext;
pub mod column; pub mod column;
mod documentation; pub mod documentation;
pub mod env; pub mod env;
mod eval; mod eval;
mod glob_from; mod glob_from;

View file

@ -1,6 +1,6 @@
// @generated by the capnpc-rust plugin to the Cap'n Proto schema compiler. // @generated by the capnpc-rust plugin to the Cap'n Proto schema compiler.
// DO NOT EDIT. // DO NOT EDIT.
// source: plugin.capnp // source: crates/nu-plugin/src/serializers/capnp/schema/plugin.capnp
pub mod err { pub mod err {
/* T */ /* T */
@ -2252,9 +2252,15 @@ pub enum Category {
Filters = 7, Filters = 7,
Formats = 8, Formats = 8,
Math = 9, Math = 9,
Strings = 10, Network = 10,
System = 11, Random = 11,
Viewers = 12, Platform = 12,
Shells = 13,
Strings = 14,
System = 15,
Viewers = 16,
Hash = 17,
Generators = 18,
} }
impl ::capnp::traits::FromU16 for Category { impl ::capnp::traits::FromU16 for Category {
#[inline] #[inline]
@ -2270,9 +2276,15 @@ impl ::capnp::traits::FromU16 for Category {
7 => ::core::result::Result::Ok(Category::Filters), 7 => ::core::result::Result::Ok(Category::Filters),
8 => ::core::result::Result::Ok(Category::Formats), 8 => ::core::result::Result::Ok(Category::Formats),
9 => ::core::result::Result::Ok(Category::Math), 9 => ::core::result::Result::Ok(Category::Math),
10 => ::core::result::Result::Ok(Category::Strings), 10 => ::core::result::Result::Ok(Category::Network),
11 => ::core::result::Result::Ok(Category::System), 11 => ::core::result::Result::Ok(Category::Random),
12 => ::core::result::Result::Ok(Category::Viewers), 12 => ::core::result::Result::Ok(Category::Platform),
13 => ::core::result::Result::Ok(Category::Shells),
14 => ::core::result::Result::Ok(Category::Strings),
15 => ::core::result::Result::Ok(Category::System),
16 => ::core::result::Result::Ok(Category::Viewers),
17 => ::core::result::Result::Ok(Category::Hash),
18 => ::core::result::Result::Ok(Category::Generators),
n => ::core::result::Result::Err(::capnp::NotInSchema(n)), n => ::core::result::Result::Err(::capnp::NotInSchema(n)),
} }
} }

View file

@ -75,9 +75,15 @@ enum Category {
filters @7; filters @7;
formats @8; formats @8;
math @9; math @9;
strings @10; network @10;
system @11; random @11;
viewers @12; platform @12;
shells @13;
strings @14;
system @15;
viewers @16;
hash @17;
generators @18;
} }
struct Flag { struct Flag {

View file

@ -18,9 +18,15 @@ pub(crate) fn serialize_signature(signature: &Signature, mut builder: signature:
Category::Filters => builder.set_category(PluginCategory::Filters), Category::Filters => builder.set_category(PluginCategory::Filters),
Category::Formats => builder.set_category(PluginCategory::Formats), Category::Formats => builder.set_category(PluginCategory::Formats),
Category::Math => builder.set_category(PluginCategory::Math), Category::Math => builder.set_category(PluginCategory::Math),
Category::Network => builder.set_category(PluginCategory::Network),
Category::Random => builder.set_category(PluginCategory::Random),
Category::Platform => builder.set_category(PluginCategory::Platform),
Category::Shells => builder.set_category(PluginCategory::Shells),
Category::Strings => builder.set_category(PluginCategory::Strings), Category::Strings => builder.set_category(PluginCategory::Strings),
Category::System => builder.set_category(PluginCategory::System), Category::System => builder.set_category(PluginCategory::System),
Category::Viewers => builder.set_category(PluginCategory::Viewers), Category::Viewers => builder.set_category(PluginCategory::Viewers),
Category::Hash => builder.set_category(PluginCategory::Hash),
Category::Generators => builder.set_category(PluginCategory::Generators),
_ => builder.set_category(PluginCategory::Default), _ => builder.set_category(PluginCategory::Default),
} }
@ -122,6 +128,12 @@ pub(crate) fn deserialize_signature(reader: signature::Reader) -> Result<Signatu
PluginCategory::Strings => Category::Strings, PluginCategory::Strings => Category::Strings,
PluginCategory::System => Category::System, PluginCategory::System => Category::System,
PluginCategory::Viewers => Category::Viewers, PluginCategory::Viewers => Category::Viewers,
PluginCategory::Network => Category::Network,
PluginCategory::Random => Category::Random,
PluginCategory::Platform => Category::Platform,
PluginCategory::Shells => Category::Shells,
PluginCategory::Hash => Category::Hash,
PluginCategory::Generators => Category::Generators,
}; };
// Deserializing required arguments // Deserializing required arguments

View file

@ -0,0 +1,19 @@
[package]
authors = ["The Nu Project Contributors"]
description = "A set of query commands for Nushell"
edition = "2021"
license = "MIT"
name = "nu_plugin_query"
version = "0.1.0"
[lib]
doctest = false
[dependencies]
nu-plugin = { path="../nu-plugin", version = "0.1.0" }
nu-protocol = { path="../nu-protocol", version = "0.1.0" }
nu-engine = { path="../nu-engine", version = "0.1.0" }
gjson = "0.8.0"
scraper = "0.12.0"
sxd-document = "0.3.2"
sxd-xpath = "0.4.2"

View file

@ -0,0 +1,12 @@
mod nu;
mod query;
mod query_json;
mod query_web;
mod query_xml;
mod web_tables;
pub use query::Query;
pub use query_json::execute_json_query;
pub use query_web::parse_selector_params;
pub use query_xml::execute_xpath_query;
pub use web_tables::WebTable;

View file

@ -0,0 +1,6 @@
use nu_plugin::{serve_plugin, CapnpSerializer};
use nu_plugin_query::Query;
fn main() {
serve_plugin(&mut Query {}, CapnpSerializer {})
}

View file

@ -0,0 +1,70 @@
use crate::Query;
use nu_plugin::{EvaluatedCall, LabeledError, Plugin};
use nu_protocol::{Category, Signature, Spanned, SyntaxShape, Value};
impl Plugin for Query {
fn signature(&self) -> Vec<Signature> {
vec![
Signature::build("query")
.desc("Show all the query commands")
.category(Category::Filters),
Signature::build("query json")
.desc("execute json query on json file (open --raw <file> | query json 'query string')")
.required("query", SyntaxShape::String, "json query")
.category(Category::Filters),
Signature::build("query xml")
.desc("execute xpath query on xml")
.required("query", SyntaxShape::String, "xpath query")
.category(Category::Filters),
Signature::build("query web")
.desc("execute selector query on html/web")
.named("query", SyntaxShape::String, "selector query", Some('q'))
.switch("as_html", "return the query output as html", Some('m'))
.named(
"attribute",
SyntaxShape::String,
"downselect based on the given attribute",
Some('a'),
)
.named(
"as_table",
SyntaxShape::Table,
"find table based on column header list",
Some('t'),
)
.switch(
"inspect",
"run in inspect mode to provide more information for determining column headers",
Some('i'),
)
.category(Category::Network),
]
}
fn run(
&mut self,
name: &str,
call: &EvaluatedCall,
input: &Value,
) -> Result<Value, LabeledError> {
// You can use the name to identify what plugin signature was called
let path: Option<Spanned<String>> = call.opt(0)?;
match name {
"query" => {
self.query(name, call, input, path)
}
"query json" => self.query_json( name, call, input, path),
"query web" => self.query_web(name, call, input, path),
"query xml" => self.query_xml(name, call, input, path),
_ => Err(LabeledError {
label: "Plugin call with wrong name signature".into(),
msg: "the signature used to call the plugin does not match any name in the plugin signature vector".into(),
span: Some(call.head),
}),
}
}
}

View file

@ -0,0 +1,75 @@
use crate::query_json::execute_json_query;
use crate::query_web::parse_selector_params;
use crate::query_xml::execute_xpath_query;
use nu_engine::documentation::get_flags_section;
use nu_plugin::{EvaluatedCall, LabeledError, Plugin};
use nu_protocol::{Signature, Spanned, Value};
#[derive(Default)]
pub struct Query;
impl Query {
pub fn new() -> Self {
Default::default()
}
pub fn usage() -> &'static str {
"Usage: query"
}
pub fn query(
&self,
_name: &str,
call: &EvaluatedCall,
_value: &Value,
_path: Option<Spanned<String>>,
) -> Result<Value, LabeledError> {
let help = get_brief_subcommand_help(&Query.signature());
Ok(Value::string(help, call.head))
}
pub fn query_json(
&self,
name: &str,
call: &EvaluatedCall,
input: &Value,
query: Option<Spanned<String>>,
) -> Result<Value, LabeledError> {
execute_json_query(name, call, input, query)
}
pub fn query_web(
&self,
_name: &str,
call: &EvaluatedCall,
input: &Value,
_rest: Option<Spanned<String>>,
) -> Result<Value, LabeledError> {
parse_selector_params(call, input)
}
pub fn query_xml(
&self,
name: &str,
call: &EvaluatedCall,
input: &Value,
query: Option<Spanned<String>>,
) -> Result<Value, LabeledError> {
execute_xpath_query(name, call, input, query)
}
}
pub fn get_brief_subcommand_help(sigs: &[Signature]) -> String {
let mut help = String::new();
help.push_str(&format!("{}\n\n", sigs[0].usage));
help.push_str(&format!("Usage:\n > {}\n\n", sigs[0].name));
help.push_str("Subcommands:\n");
for x in sigs.iter().enumerate() {
if x.0 == 0 {
continue;
}
help.push_str(&format!(" {} - {}\n", x.1.name, x.1.usage));
}
help.push_str(&get_flags_section(&sigs[0]));
help
}

View file

@ -0,0 +1,151 @@
use gjson::Value as gjValue;
use nu_plugin::{EvaluatedCall, LabeledError};
use nu_protocol::{Span, Spanned, Value};
pub fn execute_json_query(
_name: &str,
call: &EvaluatedCall,
input: &Value,
query: Option<Spanned<String>>,
) -> Result<Value, LabeledError> {
let input_string = match &input.as_string() {
Ok(s) => s.clone(),
Err(e) => {
return Err(LabeledError {
span: Some(call.head),
msg: e.to_string(),
label: "problem with input data".to_string(),
})
}
};
let query_string = match &query {
Some(v) => &v.item,
None => {
return Err(LabeledError {
msg: "problem with input data".to_string(),
label: "problem with input data".to_string(),
span: Some(call.head),
})
}
};
// Validate the json before trying to query it
let is_valid_json = gjson::valid(&input_string);
if !is_valid_json {
return Err(LabeledError {
msg: "invalid json".to_string(),
label: "invalid json".to_string(),
span: Some(call.head),
});
}
let val: gjValue = gjson::get(&input_string, query_string);
if query_contains_modifiers(query_string) {
let json_str = val.json();
Ok(Value::string(json_str, Span::test_data()))
} else {
Ok(convert_gjson_value_to_nu_value(&val, &call.head))
}
}
fn query_contains_modifiers(query: &str) -> bool {
// https://github.com/tidwall/gjson.rs documents 7 modifiers as of 4/19/21
// Some of these modifiers mean we really need to output the data as a string
// instead of tabular data. Others don't matter.
// Output as String
// @ugly: Remove all whitespace from a json document.
// @pretty: Make the json document more human readable.
query.contains("@ugly") || query.contains("@pretty")
// Output as Tablular
// Since it's output as tabular, which is our default, we can just ignore these
// @reverse: Reverse an array or the members of an object.
// @this: Returns the current element. It can be used to retrieve the root element.
// @valid: Ensure the json document is valid.
// @flatten: Flattens an array.
// @join: Joins multiple objects into a single object.
}
fn convert_gjson_value_to_nu_value(v: &gjValue, span: &Span) -> Value {
match v.kind() {
gjson::Kind::Array => {
let mut vals = vec![];
v.each(|_k, v| {
vals.push(convert_gjson_value_to_nu_value(&v, span));
true
});
Value::List { vals, span: *span }
}
gjson::Kind::Null => Value::nothing(*span),
gjson::Kind::False => Value::boolean(false, *span),
gjson::Kind::Number => {
let str_value = v.str();
if str_value.contains('.') {
Value::float(v.f64(), *span)
} else {
Value::int(v.i64(), *span)
}
}
gjson::Kind::String => Value::string(v.str(), *span),
gjson::Kind::True => Value::boolean(true, *span),
gjson::Kind::Object => {
let mut cols = vec![];
let mut vals = vec![];
v.each(|k, v| {
cols.push(k.to_string());
vals.push(convert_gjson_value_to_nu_value(&v, span));
true
});
Value::Record {
cols,
vals,
span: *span,
}
}
}
}
#[cfg(test)]
mod tests {
use gjson::{valid, Value as gjValue};
#[test]
fn validate_string() {
let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#;
let val = valid(json);
assert!(val);
}
#[test]
fn answer_from_get_age() {
let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#;
let val: gjValue = gjson::get(json, "age");
assert_eq!(val.str(), "37");
}
#[test]
fn answer_from_get_children() {
let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#;
let val: gjValue = gjson::get(json, "children");
assert_eq!(val.str(), r#"["Sara", "Alex", "Jack"]"#);
}
#[test]
fn answer_from_get_children_count() {
let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#;
let val: gjValue = gjson::get(json, "children.#");
assert_eq!(val.str(), "3");
}
#[test]
fn answer_from_get_friends_first_name() {
let json = r#"{ "name": { "first": "Tom", "last": "Anderson" }, "age": 37, "children": ["Sara", "Alex", "Jack"], "friends": [ { "first": "James", "last": "Murphy" }, { "first": "Roger", "last": "Craig" } ] }"#;
let val: gjValue = gjson::get(json, "friends.#.first");
assert_eq!(val.str(), r#"["James","Roger"]"#);
}
}

View file

@ -0,0 +1,303 @@
use crate::web_tables::WebTable;
use nu_plugin::{EvaluatedCall, LabeledError};
use nu_protocol::{Span, Value};
use scraper::{Html, Selector as ScraperSelector};
pub struct Selector {
pub query: String,
pub as_html: bool,
pub attribute: String,
pub as_table: Value,
pub inspect: bool,
}
impl Selector {
pub fn new() -> Selector {
Selector {
query: String::new(),
as_html: false,
attribute: String::new(),
as_table: Value::string("".to_string(), Span::test_data()),
inspect: false,
}
}
}
impl Default for Selector {
fn default() -> Self {
Self::new()
}
}
pub fn parse_selector_params(call: &EvaluatedCall, input: &Value) -> Result<Value, LabeledError> {
let head = call.head;
let query: String = match call.get_flag("query")? {
Some(q2) => q2,
None => "".to_string(),
};
let as_html = call.has_flag("as_html");
let attribute: String = match call.get_flag("attribute")? {
Some(a) => a,
None => "".to_string(),
};
let as_table: Value = match call.get_flag("as_table")? {
Some(v) => v,
None => Value::nothing(head),
};
let inspect = call.has_flag("inspect");
if !&query.is_empty() && ScraperSelector::parse(&query).is_err() {
return Err(LabeledError {
msg: "Cannot parse this query as a valid css selector".to_string(),
label: "Parse error".to_string(),
span: Some(head),
});
}
let selector = Selector {
query,
as_html,
attribute,
as_table,
inspect,
};
match input {
Value::String { val, span } => Ok(begin_selector_query(val.to_string(), selector, *span)),
_ => Err(LabeledError {
label: "requires text input".to_string(),
msg: "Expected text from pipeline".to_string(),
span: Some(input.span()?),
}),
}
}
fn begin_selector_query(input_html: String, selector: Selector, span: Span) -> Value {
if let Value::List { .. } = selector.as_table {
return retrieve_tables(
input_html.as_str(),
&selector.as_table,
selector.inspect,
span,
);
} else {
match selector.attribute.is_empty() {
true => execute_selector_query(
input_html.as_str(),
selector.query.as_str(),
selector.as_html,
span,
),
false => execute_selector_query_with_attribute(
input_html.as_str(),
selector.query.as_str(),
selector.attribute.as_str(),
span,
),
}
}
}
pub fn retrieve_tables(
input_string: &str,
columns: &Value,
inspect_mode: bool,
span: Span,
) -> Value {
let html = input_string;
let mut cols: Vec<String> = Vec::new();
if let Value::List { vals, .. } = &columns {
for x in vals {
// TODO Find a way to get the Config object here
if let Value::String { val, .. } = x {
cols.push(val.to_string())
}
}
}
if inspect_mode {
eprintln!("Passed in Column Headers = {:#?}", &cols,);
}
let tables = match WebTable::find_by_headers(html, &cols) {
Some(t) => {
if inspect_mode {
eprintln!("Table Found = {:#?}", &t);
}
t
}
None => vec![WebTable::empty()],
};
if tables.len() == 1 {
return retrieve_table(
tables
.into_iter()
.next()
.expect("This should never trigger"),
columns,
span,
);
}
let vals = tables
.into_iter()
.map(move |table| retrieve_table(table, columns, span))
.collect();
Value::List { vals, span }
}
fn retrieve_table(mut table: WebTable, columns: &Value, span: Span) -> Value {
let mut cols: Vec<String> = Vec::new();
if let Value::List { vals, .. } = &columns {
for x in vals {
// TODO Find a way to get the Config object here
if let Value::String { val, .. } = x {
cols.push(val.to_string())
}
}
}
if cols.is_empty() && !table.headers().is_empty() {
for col in table.headers().keys() {
cols.push(col.to_string());
}
}
let mut table_out = Vec::new();
// sometimes there are tables where the first column is the headers, kind of like
// a table has ben rotated ccw 90 degrees, in these cases all columns will be missing
// we keep track of this with this variable so we can deal with it later
let mut at_least_one_row_filled = false;
// if columns are still empty, let's just make a single column table with the data
if cols.is_empty() {
at_least_one_row_filled = true;
let table_with_no_empties: Vec<_> = table.iter().filter(|item| !item.is_empty()).collect();
let mut cols = vec![];
let mut vals = vec![];
for row in &table_with_no_empties {
for (counter, cell) in row.iter().enumerate() {
cols.push(format!("Column{}", counter));
vals.push(Value::string(cell.to_string(), span))
}
}
table_out.push(Value::Record { cols, vals, span })
} else {
for row in &table {
let mut vals = vec![];
let record_cols = &cols;
for col in &cols {
let val = row
.get(col)
.unwrap_or(&format!("Missing column: '{}'", &col))
.to_string();
if !at_least_one_row_filled && val != format!("Missing column: '{}'", &col) {
at_least_one_row_filled = true;
}
vals.push(Value::string(val, span));
}
table_out.push(Value::Record {
cols: record_cols.to_vec(),
vals,
span,
})
}
}
if !at_least_one_row_filled {
let mut data2 = Vec::new();
for x in &table.data {
data2.push(x.join(", "));
}
table.data = vec![data2];
return retrieve_table(table, columns, span);
}
// table_out
Value::List {
vals: table_out,
span,
}
}
fn execute_selector_query_with_attribute(
input_string: &str,
query_string: &str,
attribute: &str,
span: Span,
) -> Value {
let doc = Html::parse_fragment(input_string);
let vals: Vec<Value> = doc
.select(&css(query_string))
.map(|selection| {
Value::string(
selection.value().attr(attribute).unwrap_or("").to_string(),
span,
)
})
.collect();
Value::List { vals, span }
}
fn execute_selector_query(
input_string: &str,
query_string: &str,
as_html: bool,
span: Span,
) -> Value {
let doc = Html::parse_fragment(input_string);
let vals: Vec<Value> = match as_html {
true => doc
.select(&css(query_string))
.map(|selection| Value::string(selection.html(), span))
.collect(),
false => doc
.select(&css(query_string))
.map(|selection| {
Value::string(
selection
.text()
.fold("".to_string(), |acc, x| format!("{}{}", acc, x)),
span,
)
})
.collect(),
};
Value::List { vals, span }
}
pub fn css(selector: &str) -> ScraperSelector {
ScraperSelector::parse(selector).expect("this should never trigger")
}
// #[cfg(test)]
// mod tests {
// use super::*;
// const SIMPLE_LIST: &str = r#"
// <ul>
// <li>Coffee</li>
// <li>Tea</li>
// <li>Milk</li>
// </ul>
// "#;
// #[test]
// fn test_first_child_is_not_empty() {
// assert!(!execute_selector_query(SIMPLE_LIST, "li:first-child", false).is_empty())
// }
// #[test]
// fn test_first_child() {
// assert_eq!(
// vec!["Coffee".to_string()],
// execute_selector_query(SIMPLE_LIST, "li:first-child", false)
// )
// }
// }

View file

@ -0,0 +1,188 @@
use nu_plugin::{EvaluatedCall, LabeledError};
use nu_protocol::{Span, Spanned, Value};
use sxd_document::parser;
use sxd_xpath::{Context, Factory};
pub fn execute_xpath_query(
_name: &str,
call: &EvaluatedCall,
input: &Value,
query: Option<Spanned<String>>,
) -> Result<Value, LabeledError> {
let (query_string, span) = match &query {
Some(v) => (&v.item, &v.span),
None => {
return Err(LabeledError {
msg: "problem with input data".to_string(),
label: "problem with input data".to_string(),
span: Some(call.head),
})
}
};
let xpath = build_xpath(query_string, span)?;
let input_string = input.as_string()?;
let package = parser::parse(&input_string);
if package.is_err() {
return Err(LabeledError {
label: "invalid xml document".to_string(),
msg: "invalid xml document".to_string(),
span: Some(call.head),
});
}
let package = package.expect("invalid xml document");
let document = package.as_document();
let context = Context::new();
// leaving this here for augmentation at some point
// build_variables(&arguments, &mut context);
// build_namespaces(&arguments, &mut context);
let res = xpath.evaluate(&context, document.root());
// Some xpath statements can be long, so let's truncate it with ellipsis
let mut key = query_string.clone();
if query_string.len() >= 20 {
key.truncate(17);
key += "...";
} else {
key = query_string.to_string();
};
match res {
Ok(r) => {
let mut cols: Vec<String> = vec![];
let mut vals: Vec<Value> = vec![];
let mut records: Vec<Value> = vec![];
match r {
sxd_xpath::Value::Nodeset(ns) => {
for n in ns.into_iter() {
cols.push(key.to_string());
vals.push(Value::string(n.string_value(), Span::test_data()));
}
}
sxd_xpath::Value::Boolean(b) => {
cols.push(key.to_string());
vals.push(Value::boolean(b, Span::test_data()));
}
sxd_xpath::Value::Number(n) => {
cols.push(key.to_string());
vals.push(Value::float(n, Span::test_data()));
}
sxd_xpath::Value::String(s) => {
cols.push(key.to_string());
vals.push(Value::string(s, Span::test_data()));
}
};
// convert the cols and vecs to a table by creating individual records
// for each item so we can then use a list to make a table
for (k, v) in cols.iter().zip(vals.iter()) {
records.push(Value::Record {
cols: vec![k.to_string()],
vals: vec![v.clone()],
span: Span::test_data(),
})
}
Ok(Value::List {
vals: records,
span: Span::test_data(),
})
}
Err(_) => Err(LabeledError {
label: "xpath query error".to_string(),
msg: "xpath query error".to_string(),
span: Some(Span::test_data()),
}),
}
}
fn build_xpath(xpath_str: &str, span: &Span) -> Result<sxd_xpath::XPath, LabeledError> {
let factory = Factory::new();
match factory.build(xpath_str) {
Ok(xpath) => xpath.ok_or_else(|| LabeledError {
label: "invalid xpath query".to_string(),
msg: "invalid xpath query".to_string(),
span: Some(*span),
}),
Err(_) => Err(LabeledError {
label: "expected valid xpath query".to_string(),
msg: "expected valid xpath query".to_string(),
span: Some(*span),
}),
}
}
#[cfg(test)]
mod tests {
use super::execute_xpath_query as query;
use nu_plugin::EvaluatedCall;
use nu_protocol::{Span, Spanned, Value};
#[test]
fn position_function_in_predicate() {
let call = EvaluatedCall {
head: Span::test_data(),
positional: vec![],
named: vec![],
};
let text = Value::string(
r#"<?xml version="1.0" encoding="UTF-8"?><a><b/><b/></a>"#,
Span::test_data(),
);
let spanned_str: Spanned<String> = Spanned {
item: "count(//a/*[position() = 2])".to_string(),
span: Span::test_data(),
};
let actual = query("", &call, &text, Some(spanned_str)).expect("test should not fail");
let expected = Value::List {
vals: vec![Value::Record {
cols: vec!["count(//a/*[posit...".to_string()],
vals: vec![Value::float(1.0, Span::test_data())],
span: Span::test_data(),
}],
span: Span::test_data(),
};
assert_eq!(actual, expected);
}
#[test]
fn functions_implicitly_coerce_argument_types() {
let call = EvaluatedCall {
head: Span::test_data(),
positional: vec![],
named: vec![],
};
let text = Value::string(
r#"<?xml version="1.0" encoding="UTF-8"?><a>true</a>"#,
Span::test_data(),
);
let spanned_str: Spanned<String> = Spanned {
item: "count(//*[contains(., true)])".to_string(),
span: Span::test_data(),
};
let actual = query("", &call, &text, Some(spanned_str)).expect("test should not fail");
let expected = Value::List {
vals: vec![Value::Record {
cols: vec!["count(//*[contain...".to_string()],
vals: vec![Value::float(1.0, Span::test_data())],
span: Span::test_data(),
}],
span: Span::test_data(),
};
assert_eq!(actual, expected);
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,6 @@
use nu_plugin::{serve_plugin, CapnpSerializer};
use nu_plugin_query::Query;
fn main() {
serve_plugin(&mut Query::new(), CapnpSerializer {})
}