From a88673b18e56cb056d8dab4f8c7bae60848b8915 Mon Sep 17 00:00:00 2001 From: CosmicHorror Date: Sat, 28 Oct 2023 14:13:54 -0600 Subject: [PATCH] Error on `$` capture replacement names (#258) * Mostly working capture name validation * Improve inputs for property tests * Fix advancing when passing over escaped dollar signs * Switch to inline snapshot captures * Cleanup invalid capture error formatting code --- Cargo.lock | 317 ++++++++++++++++- Cargo.toml | 5 + proptest-regressions/replacer/tests.txt | 8 + proptest-regressions/replacer/validate.txt | 10 + src/error.rs | 4 + src/main.rs | 12 +- src/{replacer.rs => replacer/mod.rs} | 77 +---- src/replacer/tests.rs | 80 +++++ src/replacer/validate.rs | 380 +++++++++++++++++++++ tests/cli.rs | 92 +++++ 10 files changed, 902 insertions(+), 83 deletions(-) create mode 100644 proptest-regressions/replacer/tests.txt create mode 100644 proptest-regressions/replacer/validate.txt rename src/{replacer.rs => replacer/mod.rs} (76%) create mode 100644 src/replacer/tests.rs create mode 100644 src/replacer/validate.rs diff --git a/Cargo.lock b/Cargo.lock index 09911ab..8aeef89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "ansi-to-html" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7bd918cc0ff933f0e6cf48a8f74584818ea43e07d1fba1f9251bb3df2a37ca2" +dependencies = [ + "regex", + "thiserror", +] + [[package]] name = "ansi_term" version = "0.12.1" @@ -55,7 +65,7 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" dependencies = [ - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -65,7 +75,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628" dependencies = [ "anstyle", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -95,6 +105,21 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -190,6 +215,19 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "console" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "unicode-width", + "windows-sys 0.45.0", +] + [[package]] name = "crossbeam-deque" version = "0.8.3" @@ -241,6 +279,12 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "errno" version = "0.3.5" @@ -248,7 +292,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -263,6 +307,17 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "globset" version = "0.4.13" @@ -316,6 +371,19 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "insta" +version = "1.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d64600be34b2fcfc267740a243fa7744441bb4947a619ac4e5bb6507f35fbfc" +dependencies = [ + "console", + "lazy_static", + "linked-hash-map", + "similar", + "yaml-rust", +] + [[package]] name = "is-terminal" version = "0.4.9" @@ -324,7 +392,7 @@ checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi", "rustix", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -348,6 +416,18 @@ version = "0.2.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.4.10" @@ -384,12 +464,28 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", + "libm", +] + [[package]] name = "once_cell" version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + [[package]] name = "predicates" version = "3.0.4" @@ -427,6 +523,32 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proptest" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c003ac8c77cb07bb74f5f198bce836a689bcd5a42574612bf14d17bfd08c20e" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags 2.4.1", + "lazy_static", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax 0.7.5", + "rusty-fork", + "tempfile", + "unarray", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.33" @@ -436,6 +558,45 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_xorshift" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" +dependencies = [ + "rand_core", +] + [[package]] name = "rayon" version = "1.8.0" @@ -474,7 +635,7 @@ dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax", + "regex-syntax 0.8.2", ] [[package]] @@ -485,9 +646,15 @@ checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.2", ] +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + [[package]] name = "regex-syntax" version = "0.8.2" @@ -510,7 +677,19 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.48.0", +] + +[[package]] +name = "rusty-fork" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", ] [[package]] @@ -532,17 +711,22 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" name = "sd" version = "0.7.6" dependencies = [ + "ansi-to-html", "ansi_term", "anyhow", "assert_cmd", "clap", "clap_mangen", + "console", "globwalk", "ignore", + "insta", "is-terminal", "memmap2", + "proptest", "rayon", "regex", + "regex-automata", "tempfile", "thiserror", "unescape", @@ -568,6 +752,12 @@ dependencies = [ "syn", ] +[[package]] +name = "similar" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2aeaf503862c419d66959f5d7ca015337d864e9c49485d771b732e2a20453597" + [[package]] name = "strsim" version = "0.10.0" @@ -595,7 +785,7 @@ dependencies = [ "fastrand", "redox_syscall", "rustix", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -605,7 +795,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" dependencies = [ "rustix", - "windows-sys", + "windows-sys 0.48.0", ] [[package]] @@ -644,6 +834,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "unescape" version = "0.1.0" @@ -656,6 +852,12 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + [[package]] name = "utf8parse" version = "0.2.1" @@ -681,6 +883,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + [[package]] name = "winapi" version = "0.3.9" @@ -712,13 +920,37 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + [[package]] name = "windows-sys" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets", + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", ] [[package]] @@ -727,51 +959,93 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -787,3 +1061,12 @@ dependencies = [ "clap_mangen", "roff", ] + +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] diff --git a/Cargo.toml b/Cargo.toml index 3216193..1d9ab60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,11 @@ clap.workspace = true assert_cmd = "2.0.12" anyhow = "1.0.75" clap_mangen = "0.2.14" +proptest = "1.3.1" +console = "0.15.7" +insta = "1.34.0" +ansi-to-html = "0.1.3" +regex-automata = "0.4.3" [profile.release] opt-level = 3 diff --git a/proptest-regressions/replacer/tests.txt b/proptest-regressions/replacer/tests.txt new file mode 100644 index 0000000..f164b58 --- /dev/null +++ b/proptest-regressions/replacer/tests.txt @@ -0,0 +1,8 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 3a23ade8355ca034558ea8635e4ea2ee96ecb38b7b1cb9a854509d7633d45795 # shrinks to s = "" +cc 8c8d1e7497465f26416bddb7607df0de1fce48d098653eeabac0ad2aeba1fa0a # shrinks to s = "$0$0a" diff --git a/proptest-regressions/replacer/validate.txt b/proptest-regressions/replacer/validate.txt new file mode 100644 index 0000000..bbdb3b3 --- /dev/null +++ b/proptest-regressions/replacer/validate.txt @@ -0,0 +1,10 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc cfacd65058c8dae0ac7b91c56b8096c36ef68cb35d67262debebac005ea9c677 # shrinks to s = "" +cc 61e5dc6ce0314cde48b5cbc839fbf46a49fcf8d0ba02cfeecdcbff52fca8c786 # shrinks to s = "$a" +cc 8e5fd9dbb58ae762a751349749320664715056ef63aad58215397e87ee42c722 # shrinks to s = "$$" +cc 37c2e41ceeddbecbc4e574f82b58a4007923027ad1a6756bf2f547aa3f748d13 # shrinks to s = "$$0" diff --git a/src/error.rs b/src/error.rs index e757cf4..517defd 100644 --- a/src/error.rs +++ b/src/error.rs @@ -3,6 +3,8 @@ use std::{ path::PathBuf, }; +use crate::replacer::InvalidReplaceCapture; + #[derive(thiserror::Error)] pub enum Error { #[error("invalid regex {0}")] @@ -15,6 +17,8 @@ pub enum Error { InvalidPath(PathBuf), #[error("failed processing files:\n{0}")] FailedProcessing(FailedJobs), + #[error("{0}")] + InvalidReplaceCapture(#[from] InvalidReplaceCapture), } pub struct FailedJobs(Vec<(PathBuf, Error)>); diff --git a/src/main.rs b/src/main.rs index de24644..c7097b4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,13 +5,23 @@ mod input; pub(crate) mod replacer; pub(crate) mod utils; +use std::process; + pub(crate) use self::input::{App, Source}; +use ansi_term::{Color, Style}; pub(crate) use error::{Error, Result}; use replacer::Replacer; use clap::Parser; -fn main() -> Result<()> { +fn main() { + if let Err(e) = try_main() { + eprintln!("{}: {}", Style::from(Color::Red).bold().paint("error"), e); + process::exit(1); + } +} + +fn try_main() -> Result<()> { let options = cli::Options::parse(); let source = if options.recursive { diff --git a/src/replacer.rs b/src/replacer/mod.rs similarity index 76% rename from src/replacer.rs rename to src/replacer/mod.rs index f6d5d21..b6e0c0b 100644 --- a/src/replacer.rs +++ b/src/replacer/mod.rs @@ -1,7 +1,15 @@ -use crate::{utils, Error, Result}; -use regex::bytes::Regex; use std::{fs, fs::File, io::prelude::*, path::Path}; +use crate::{utils, Error, Result}; + +use regex::bytes::Regex; + +#[cfg(test)] +mod tests; +mod validate; + +pub use validate::{validate_replace, InvalidReplaceCapture}; + pub(crate) struct Replacer { regex: Regex, replace_with: Vec, @@ -20,6 +28,8 @@ impl Replacer { let (look_for, replace_with) = if is_literal { (regex::escape(&look_for), replace_with.into_bytes()) } else { + validate_replace(&replace_with)?; + ( look_for, utils::unescape(&replace_with) @@ -154,66 +164,3 @@ impl Replacer { Ok(()) } } - -#[cfg(test)] -mod tests { - use super::*; - - fn replace( - look_for: impl Into, - replace_with: impl Into, - literal: bool, - flags: Option<&'static str>, - src: &'static str, - target: &'static str, - ) { - let replacer = Replacer::new( - look_for.into(), - replace_with.into(), - literal, - flags.map(ToOwned::to_owned), - None, - ) - .unwrap(); - assert_eq!( - std::str::from_utf8(&replacer.replace(src.as_bytes())), - Ok(target) - ); - } - - #[test] - fn default_global() { - replace("a", "b", false, None, "aaa", "bbb"); - } - - #[test] - fn escaped_char_preservation() { - replace("a", "b", false, None, "a\\n", "b\\n"); - } - - #[test] - fn case_sensitive_default() { - replace("abc", "x", false, None, "abcABC", "xABC"); - replace("abc", "x", true, None, "abcABC", "xABC"); - } - - #[test] - fn sanity_check_literal_replacements() { - replace("((special[]))", "x", true, None, "((special[]))y", "xy"); - } - - #[test] - fn unescape_regex_replacements() { - replace("test", r"\n", false, None, "testtest", "\n\n"); - } - - #[test] - fn no_unescape_literal_replacements() { - replace("test", r"\n", true, None, "testtest", r"\n\n"); - } - - #[test] - fn full_word_replace() { - replace("abc", "def", false, Some("w"), "abcd abc", "abcd def"); - } -} diff --git a/src/replacer/tests.rs b/src/replacer/tests.rs new file mode 100644 index 0000000..9304a17 --- /dev/null +++ b/src/replacer/tests.rs @@ -0,0 +1,80 @@ +use super::*; + +use proptest::prelude::*; + +proptest! { + #[test] + fn validate_doesnt_panic(s in r"(\PC*\$?){0,5}") { + let _ = validate::validate_replace(&s); + } + + // $ followed by a digit and a non-ident char or an ident char + #[test] + fn validate_ok(s in r"([^\$]*(\$([0-9][^a-zA-Z_0-9\$]|a-zA-Z_))?){0,5}") { + validate::validate_replace(&s).unwrap(); + } + + // Force at least one $ followed by a digit and an ident char + #[test] + fn validate_err(s in r"[^\$]*?\$[0-9][a-zA-Z_]\PC*") { + validate::validate_replace(&s).unwrap_err(); + } +} + +fn replace( + look_for: impl Into, + replace_with: impl Into, + literal: bool, + flags: Option<&'static str>, + src: &'static str, + target: &'static str, +) { + let replacer = Replacer::new( + look_for.into(), + replace_with.into(), + literal, + flags.map(ToOwned::to_owned), + None, + ) + .unwrap(); + assert_eq!( + std::str::from_utf8(&replacer.replace(src.as_bytes())), + Ok(target) + ); +} + +#[test] +fn default_global() { + replace("a", "b", false, None, "aaa", "bbb"); +} + +#[test] +fn escaped_char_preservation() { + replace("a", "b", false, None, "a\\n", "b\\n"); +} + +#[test] +fn case_sensitive_default() { + replace("abc", "x", false, None, "abcABC", "xABC"); + replace("abc", "x", true, None, "abcABC", "xABC"); +} + +#[test] +fn sanity_check_literal_replacements() { + replace("((special[]))", "x", true, None, "((special[]))y", "xy"); +} + +#[test] +fn unescape_regex_replacements() { + replace("test", r"\n", false, None, "testtest", "\n\n"); +} + +#[test] +fn no_unescape_literal_replacements() { + replace("test", r"\n", true, None, "testtest", r"\n\n"); +} + +#[test] +fn full_word_replace() { + replace("abc", "def", false, Some("w"), "abcd abc", "abcd def"); +} diff --git a/src/replacer/validate.rs b/src/replacer/validate.rs new file mode 100644 index 0000000..da5cc71 --- /dev/null +++ b/src/replacer/validate.rs @@ -0,0 +1,380 @@ +use std::{error::Error, fmt, str::CharIndices}; + +use ansi_term::{Color, Style}; + +#[derive(Debug)] +pub struct InvalidReplaceCapture { + original_replace: String, + invalid_ident: Span, + num_leading_digits: usize, +} + +impl Error for InvalidReplaceCapture {} + +// NOTE: This code is much more allocation heavy than it needs to be, but it's +// only displayed as a hard error to the user, so it's not a big deal +impl fmt::Display for InvalidReplaceCapture { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + #[derive(Clone, Copy)] + enum SpecialChar { + Newline, + CarriageReturn, + Tab, + } + + impl SpecialChar { + fn new(c: char) -> Option { + match c { + '\n' => Some(Self::Newline), + '\r' => Some(Self::CarriageReturn), + '\t' => Some(Self::Tab), + _ => None, + } + } + + /// Renders as the character from the "Control Pictures" block + /// + /// https://en.wikipedia.org/wiki/Control_Pictures + fn render(self) -> char { + match self { + Self::Newline => '␊', + Self::CarriageReturn => '␍', + Self::Tab => '␉', + } + } + } + + let Self { + original_replace, + invalid_ident, + num_leading_digits, + } = self; + + // Build up the error to show the user + let mut formatted = String::new(); + let mut arrows_start = Span::start_at(0); + let special = Style::new().bold(); + let error = Style::from(Color::Red).bold(); + for (byte_index, c) in original_replace.char_indices() { + let (prefix, suffix, text) = match SpecialChar::new(c) { + Some(c) => { + (Some(special.prefix()), Some(special.suffix()), c.render()) + } + None => { + let (prefix, suffix) = if byte_index == invalid_ident.start + { + (Some(error.prefix()), None) + } else if byte_index + == invalid_ident.end.checked_sub(1).unwrap() + { + (None, Some(error.suffix())) + } else { + (None, None) + }; + (prefix, suffix, c) + } + }; + + if let Some(prefix) = prefix { + formatted.push_str(&prefix.to_string()); + } + formatted.push(text); + if let Some(suffix) = suffix { + formatted.push_str(&suffix.to_string()); + } + + if byte_index < invalid_ident.start { + // Assumes that characters have a base display width of 1. While + // that's not technically true, it's near impossible to do right + // since the specifics on text rendering is up to the user's + // terminal/font. This _does_ rely on variable-width characters + // like \n, \r, and \t getting converting to single character + // representations above + arrows_start.start += 1; + } + } + + // This relies on all non-curly-braced capture chars being 1 byte + let arrows_span = arrows_start.end_offset(invalid_ident.len()); + let mut arrows = " ".repeat(arrows_span.start); + arrows.push_str(&format!( + "{}", + Style::new().bold().paint("^".repeat(arrows_span.len())) + )); + + let ident = invalid_ident.slice(original_replace); + let (number, the_rest) = ident.split_at(*num_leading_digits); + let disambiguous = format!("${{{number}}}{the_rest}"); + let error_message = format!( + "The numbered capture group `{}` in the replacement text is ambiguous.", + Style::new().bold().paint(format!("${}", number).to_string()) + ); + let hint_message = format!( + "{}: Use curly braces to disambiguate it `{}`.", + Style::from(Color::Blue).bold().paint("hint"), + Style::new().bold().paint(disambiguous) + ); + + writeln!(f, "{}", error_message)?; + writeln!(f, "{}", hint_message)?; + writeln!(f, "{}", formatted)?; + write!(f, "{}", arrows) + } +} + +pub fn validate_replace(s: &str) -> Result<(), InvalidReplaceCapture> { + for ident in ReplaceCaptureIter::new(s) { + let mut char_it = ident.name.char_indices(); + let (_, c) = char_it.next().unwrap(); + if c.is_ascii_digit() { + for (i, c) in char_it { + if !c.is_ascii_digit() { + return Err(InvalidReplaceCapture { + original_replace: s.to_owned(), + invalid_ident: ident.span, + num_leading_digits: i, + }); + } + } + } + } + + Ok(()) +} + +#[derive(Clone, Copy, Debug)] +struct Span { + start: usize, + end: usize, +} + +impl Span { + fn start_at(start: usize) -> SpanOpen { + SpanOpen { start } + } + + fn new(start: usize, end: usize) -> Self { + // `<` instead of `<=` because `Span` is exclusive on the upper bound + assert!(start < end); + Self { start, end } + } + + fn slice(self, s: &str) -> &str { + &s[self.start..self.end] + } + + fn len(self) -> usize { + self.end - self.start + } +} + +#[derive(Clone, Copy)] +struct SpanOpen { + start: usize, +} + +impl SpanOpen { + fn end_at(self, end: usize) -> Span { + let Self { start } = self; + Span::new(start, end) + } + + fn end_offset(self, offset: usize) -> Span { + assert_ne!(offset, 0); + let Self { start } = self; + self.end_at(start + offset) + } +} + +#[derive(Debug)] +struct Capture<'rep> { + name: &'rep str, + span: Span, +} + +impl<'rep> Capture<'rep> { + fn new(name: &'rep str, span: Span) -> Self { + Self { name, span } + } +} + +/// An iterator over the capture idents in an interpolated replacement string +/// +/// This code is adapted from the `regex` crate +/// +/// (hence the high quality doc comments). +struct ReplaceCaptureIter<'rep>(CharIndices<'rep>); + +impl<'rep> ReplaceCaptureIter<'rep> { + fn new(s: &'rep str) -> Self { + Self(s.char_indices()) + } +} + +impl<'rep> Iterator for ReplaceCaptureIter<'rep> { + type Item = Capture<'rep>; + + fn next(&mut self) -> Option { + // Continually seek to `$` until we find one that has a capture group + loop { + let (start, _) = self.0.find(|(_, c)| *c == '$')?; + + let replacement = self.0.as_str(); + let rep = replacement.as_bytes(); + let open_span = Span::start_at(start + 1); + let maybe_cap = match rep.first()? { + // Handle escaping of '$'. + b'$' => { + self.0.next().unwrap(); + None + } + b'{' => find_cap_ref_braced(rep, open_span), + _ => find_cap_ref(rep, open_span), + }; + + if let Some(cap) = maybe_cap { + // Advance the inner iterator to consume the capture + let mut remaining_bytes = cap.name.len(); + while remaining_bytes > 0 { + let (_, c) = self.0.next().unwrap(); + remaining_bytes = + remaining_bytes.checked_sub(c.len_utf8()).unwrap(); + } + return Some(cap); + } + } + } +} + +/// Parses a possible reference to a capture group name in the given text, +/// starting at the beginning of `replacement`. +/// +/// If no such valid reference could be found, None is returned. +fn find_cap_ref(rep: &[u8], open_span: SpanOpen) -> Option> { + if rep.is_empty() { + return None; + } + + let mut cap_end = 0; + while rep.get(cap_end).copied().map_or(false, is_valid_cap_letter) { + cap_end += 1; + } + if cap_end == 0 { + return None; + } + + // We just verified that the range 0..cap_end is valid ASCII, so it must + // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8 + // check via an unchecked conversion or by parsing the number straight from + // &[u8]. + let name = core::str::from_utf8(&rep[..cap_end]) + .expect("valid UTF-8 capture name"); + Some(Capture::new(name, open_span.end_offset(name.len()))) +} + +/// Looks for a braced reference, e.g., `${foo1}`. This then looks for a +/// closing brace and returns the capture reference within the brace. +fn find_cap_ref_braced(rep: &[u8], open_span: SpanOpen) -> Option> { + assert_eq!(b'{', rep[0]); + let mut cap_end = 1; + + while rep.get(cap_end).map_or(false, |&b| b != b'}') { + cap_end += 1; + } + if !rep.get(cap_end).map_or(false, |&b| b == b'}') { + return None; + } + + // When looking at braced names, we don't put any restrictions on the name, + // so it's possible it could be invalid UTF-8. But a capture group name + // can never be invalid UTF-8, so if we have invalid UTF-8, then we can + // safely return None. + let name = core::str::from_utf8(&rep[..cap_end + 1]).ok()?; + Some(Capture::new(name, open_span.end_offset(name.len()))) +} + +fn is_valid_cap_letter(b: u8) -> bool { + matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_') +} + +#[cfg(test)] +mod tests { + use super::*; + + use proptest::prelude::*; + + #[test] + fn literal_dollar_sign() { + let replace = "$$0"; + let mut cap_iter = ReplaceCaptureIter::new(replace); + assert!(cap_iter.next().is_none()); + } + + #[test] + fn wacky_captures() { + let replace = + "$foo $1 $1invalid ${1}valid ${valid} $__${__weird__}${${__}"; + + let cap_iter = ReplaceCaptureIter::new(replace); + let expecteds = &[ + "foo", + "1", + "1invalid", + "{1}", + "{valid}", + "__", + "{__weird__}", + "{${__}", + ]; + for (&expected, cap) in expecteds.iter().zip(cap_iter) { + assert_eq!(expected, cap.name, "name didn't match"); + assert_eq!(expected, cap.span.slice(replace), "span didn't match"); + } + } + + const INTERPOLATED_CAPTURE: &str = ""; + + fn upstream_interpolate(s: &str) -> String { + let mut dst = String::new(); + regex_automata::util::interpolate::string( + s, + |_, dst| dst.push_str(INTERPOLATED_CAPTURE), + |_| Some(0), + &mut dst, + ); + dst + } + + fn our_interpolate(s: &str) -> String { + let mut after_last_write = 0; + let mut dst = String::new(); + for cap in ReplaceCaptureIter::new(s) { + // This only iterates over the capture groups, so copy any text + // before the capture + // -1 here to exclude the `$` that starts a capture + dst.push_str( + &s[after_last_write..cap.span.start.checked_sub(1).unwrap()], + ); + // Interpolate our capture + dst.push_str(INTERPOLATED_CAPTURE); + after_last_write = cap.span.end; + } + if after_last_write < s.len() { + // And now any text that was after the last capture + dst.push_str(&s[after_last_write..]); + } + + // Handle escaping literal `$`s + dst.replace("$$", "$") + } + + proptest! { + // `regex-automata` doesn't expose a way to iterate over replacement + // captures, but we can use our iterator to mimic interpolation, so that + // we can pit the two against each other + #[test] + fn interpolation_matches_upstream(s in r"\PC*(\$\PC*){0,5}") { + assert_eq!(our_interpolate(&s), upstream_interpolate(&s)); + } + } +} diff --git a/tests/cli.rs b/tests/cli.rs index 29756b3..d0af23f 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -102,4 +102,96 @@ mod cli { Ok(()) } + + fn bad_replace_helper_styled(replace: &str) -> String { + let err = sd() + .args(["find", replace]) + .write_stdin("stdin") + .unwrap_err(); + String::from_utf8(err.as_output().unwrap().stderr.clone()).unwrap() + } + + fn bad_replace_helper_plain(replace: &str) -> String { + let stderr = bad_replace_helper_styled(replace); + + // TODO: no easy way to toggle off styling yet. Add a `--color ` + // flag, and respect things like `$NO_COLOR`. `ansi_term` is + // unmaintained, so we should migrate off of it anyways + console::AnsiCodeIterator::new(&stderr) + .filter_map(|(s, is_ansi)| (!is_ansi).then_some(s)) + .collect() + } + + #[test] + fn fixed_strings_ambiguous_replace_is_fine() { + sd().args([ + "--fixed-strings", + "foo", + "inner_before $1fine inner_after", + ]) + .write_stdin("outer_before foo outer_after") + .assert() + .success() + .stdout("outer_before inner_before $1fine inner_after outer_after"); + } + + #[test] + fn ambiguous_replace_basic() { + let plain_stderr = bad_replace_helper_plain("before $1bad after"); + insta::assert_snapshot!(plain_stderr, @r###" + error: The numbered capture group `$1` in the replacement text is ambiguous. + hint: Use curly braces to disambiguate it `${1}bad`. + before $1bad after + ^^^^ + "###); + } + + #[test] + fn ambiguous_replace_variable_width() { + let plain_stderr = bad_replace_helper_plain("\r\n\t$1bad\r"); + insta::assert_snapshot!(plain_stderr, @r###" + error: The numbered capture group `$1` in the replacement text is ambiguous. + hint: Use curly braces to disambiguate it `${1}bad`. + ␍␊␉$1bad␍ + ^^^^ + "###); + } + + #[test] + fn ambiguous_replace_multibyte_char() { + let plain_stderr = bad_replace_helper_plain("😈$1bad😇"); + insta::assert_snapshot!(plain_stderr, @r###" + error: The numbered capture group `$1` in the replacement text is ambiguous. + hint: Use curly braces to disambiguate it `${1}bad`. + 😈$1bad😇 + ^^^^ + "###); + } + + #[test] + fn ambiguous_replace_issue_44() { + let plain_stderr = + bad_replace_helper_plain("$1Call $2($5, GetFM20ReturnKey(), $6)"); + insta::assert_snapshot!(plain_stderr, @r###" + error: The numbered capture group `$1` in the replacement text is ambiguous. + hint: Use curly braces to disambiguate it `${1}Call`. + $1Call $2($5, GetFM20ReturnKey(), $6) + ^^^^^ + "###); + } + + // NOTE: styled terminal output is platform dependent, so convert to a + // common format, in this case HTML, to check + #[test] + fn ambiguous_replace_ensure_styling() { + let styled_stderr = bad_replace_helper_styled("\t$1bad after"); + let html_stderr = + ansi_to_html::convert(&styled_stderr, true, true).unwrap(); + insta::assert_snapshot!(html_stderr, @r###" + error: The numbered capture group `$1` in the replacement text is ambiguous. + hint: Use curly braces to disambiguate it `${1}bad`. + $1bad after + ^^^^ + "###); + } }