Error on $<num><non_num> capture replacement names (#258)

* Mostly working capture name validation

* Improve inputs for property tests

* Fix advancing when passing over escaped dollar signs

* Switch to inline snapshot captures

* Cleanup invalid capture error formatting code
This commit is contained in:
CosmicHorror 2023-10-28 14:13:54 -06:00 committed by GitHub
parent a98100fb53
commit a88673b18e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 902 additions and 83 deletions

317
Cargo.lock generated
View file

@ -11,6 +11,16 @@ dependencies = [
"memchr",
]
[[package]]
name = "ansi-to-html"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7bd918cc0ff933f0e6cf48a8f74584818ea43e07d1fba1f9251bb3df2a37ca2"
dependencies = [
"regex",
"thiserror",
]
[[package]]
name = "ansi_term"
version = "0.12.1"
@ -55,7 +65,7 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
dependencies = [
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -65,7 +75,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628"
dependencies = [
"anstyle",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -95,6 +105,21 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bit-set"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -190,6 +215,19 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "console"
version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8"
dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"unicode-width",
"windows-sys 0.45.0",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.3"
@ -241,6 +279,12 @@ version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "errno"
version = "0.3.5"
@ -248,7 +292,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860"
dependencies = [
"libc",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -263,6 +307,17 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "getrandom"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "globset"
version = "0.4.13"
@ -316,6 +371,19 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "insta"
version = "1.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d64600be34b2fcfc267740a243fa7744441bb4947a619ac4e5bb6507f35fbfc"
dependencies = [
"console",
"lazy_static",
"linked-hash-map",
"similar",
"yaml-rust",
]
[[package]]
name = "is-terminal"
version = "0.4.9"
@ -324,7 +392,7 @@ checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
dependencies = [
"hermit-abi",
"rustix",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -348,6 +416,18 @@ version = "0.2.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
[[package]]
name = "libm"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
[[package]]
name = "linked-hash-map"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
[[package]]
name = "linux-raw-sys"
version = "0.4.10"
@ -384,12 +464,28 @@ dependencies = [
"autocfg",
]
[[package]]
name = "num-traits"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
dependencies = [
"autocfg",
"libm",
]
[[package]]
name = "once_cell"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "ppv-lite86"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "predicates"
version = "3.0.4"
@ -427,6 +523,32 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "proptest"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c003ac8c77cb07bb74f5f198bce836a689bcd5a42574612bf14d17bfd08c20e"
dependencies = [
"bit-set",
"bit-vec",
"bitflags 2.4.1",
"lazy_static",
"num-traits",
"rand",
"rand_chacha",
"rand_xorshift",
"regex-syntax 0.7.5",
"rusty-fork",
"tempfile",
"unarray",
]
[[package]]
name = "quick-error"
version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
[[package]]
name = "quote"
version = "1.0.33"
@ -436,6 +558,45 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "rand_xorshift"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f"
dependencies = [
"rand_core",
]
[[package]]
name = "rayon"
version = "1.8.0"
@ -474,7 +635,7 @@ dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
"regex-syntax 0.8.2",
]
[[package]]
@ -485,9 +646,15 @@ checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"regex-syntax 0.8.2",
]
[[package]]
name = "regex-syntax"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
[[package]]
name = "regex-syntax"
version = "0.8.2"
@ -510,7 +677,19 @@ dependencies = [
"errno",
"libc",
"linux-raw-sys",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
name = "rusty-fork"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f"
dependencies = [
"fnv",
"quick-error",
"tempfile",
"wait-timeout",
]
[[package]]
@ -532,17 +711,22 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
name = "sd"
version = "0.7.6"
dependencies = [
"ansi-to-html",
"ansi_term",
"anyhow",
"assert_cmd",
"clap",
"clap_mangen",
"console",
"globwalk",
"ignore",
"insta",
"is-terminal",
"memmap2",
"proptest",
"rayon",
"regex",
"regex-automata",
"tempfile",
"thiserror",
"unescape",
@ -568,6 +752,12 @@ dependencies = [
"syn",
]
[[package]]
name = "similar"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2aeaf503862c419d66959f5d7ca015337d864e9c49485d771b732e2a20453597"
[[package]]
name = "strsim"
version = "0.10.0"
@ -595,7 +785,7 @@ dependencies = [
"fastrand",
"redox_syscall",
"rustix",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -605,7 +795,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7"
dependencies = [
"rustix",
"windows-sys",
"windows-sys 0.48.0",
]
[[package]]
@ -644,6 +834,12 @@ dependencies = [
"once_cell",
]
[[package]]
name = "unarray"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
[[package]]
name = "unescape"
version = "0.1.0"
@ -656,6 +852,12 @@ version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "unicode-width"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
[[package]]
name = "utf8parse"
version = "0.2.1"
@ -681,6 +883,12 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi"
version = "0.3.9"
@ -712,13 +920,37 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
dependencies = [
"windows-targets 0.42.2",
]
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets",
"windows-targets 0.48.5",
]
[[package]]
name = "windows-targets"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
dependencies = [
"windows_aarch64_gnullvm 0.42.2",
"windows_aarch64_msvc 0.42.2",
"windows_i686_gnu 0.42.2",
"windows_i686_msvc 0.42.2",
"windows_x86_64_gnu 0.42.2",
"windows_x86_64_gnullvm 0.42.2",
"windows_x86_64_msvc 0.42.2",
]
[[package]]
@ -727,51 +959,93 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
@ -787,3 +1061,12 @@ dependencies = [
"clap_mangen",
"roff",
]
[[package]]
name = "yaml-rust"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
dependencies = [
"linked-hash-map",
]

View file

@ -43,6 +43,11 @@ clap.workspace = true
assert_cmd = "2.0.12"
anyhow = "1.0.75"
clap_mangen = "0.2.14"
proptest = "1.3.1"
console = "0.15.7"
insta = "1.34.0"
ansi-to-html = "0.1.3"
regex-automata = "0.4.3"
[profile.release]
opt-level = 3

View file

@ -0,0 +1,8 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 3a23ade8355ca034558ea8635e4ea2ee96ecb38b7b1cb9a854509d7633d45795 # shrinks to s = ""
cc 8c8d1e7497465f26416bddb7607df0de1fce48d098653eeabac0ad2aeba1fa0a # shrinks to s = "$0$0a"

View file

@ -0,0 +1,10 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc cfacd65058c8dae0ac7b91c56b8096c36ef68cb35d67262debebac005ea9c677 # shrinks to s = ""
cc 61e5dc6ce0314cde48b5cbc839fbf46a49fcf8d0ba02cfeecdcbff52fca8c786 # shrinks to s = "$a"
cc 8e5fd9dbb58ae762a751349749320664715056ef63aad58215397e87ee42c722 # shrinks to s = "$$"
cc 37c2e41ceeddbecbc4e574f82b58a4007923027ad1a6756bf2f547aa3f748d13 # shrinks to s = "$$0"

View file

@ -3,6 +3,8 @@ use std::{
path::PathBuf,
};
use crate::replacer::InvalidReplaceCapture;
#[derive(thiserror::Error)]
pub enum Error {
#[error("invalid regex {0}")]
@ -15,6 +17,8 @@ pub enum Error {
InvalidPath(PathBuf),
#[error("failed processing files:\n{0}")]
FailedProcessing(FailedJobs),
#[error("{0}")]
InvalidReplaceCapture(#[from] InvalidReplaceCapture),
}
pub struct FailedJobs(Vec<(PathBuf, Error)>);

View file

@ -5,13 +5,23 @@ mod input;
pub(crate) mod replacer;
pub(crate) mod utils;
use std::process;
pub(crate) use self::input::{App, Source};
use ansi_term::{Color, Style};
pub(crate) use error::{Error, Result};
use replacer::Replacer;
use clap::Parser;
fn main() -> Result<()> {
fn main() {
if let Err(e) = try_main() {
eprintln!("{}: {}", Style::from(Color::Red).bold().paint("error"), e);
process::exit(1);
}
}
fn try_main() -> Result<()> {
let options = cli::Options::parse();
let source = if options.recursive {

View file

@ -1,7 +1,15 @@
use crate::{utils, Error, Result};
use regex::bytes::Regex;
use std::{fs, fs::File, io::prelude::*, path::Path};
use crate::{utils, Error, Result};
use regex::bytes::Regex;
#[cfg(test)]
mod tests;
mod validate;
pub use validate::{validate_replace, InvalidReplaceCapture};
pub(crate) struct Replacer {
regex: Regex,
replace_with: Vec<u8>,
@ -20,6 +28,8 @@ impl Replacer {
let (look_for, replace_with) = if is_literal {
(regex::escape(&look_for), replace_with.into_bytes())
} else {
validate_replace(&replace_with)?;
(
look_for,
utils::unescape(&replace_with)
@ -154,66 +164,3 @@ impl Replacer {
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
fn replace(
look_for: impl Into<String>,
replace_with: impl Into<String>,
literal: bool,
flags: Option<&'static str>,
src: &'static str,
target: &'static str,
) {
let replacer = Replacer::new(
look_for.into(),
replace_with.into(),
literal,
flags.map(ToOwned::to_owned),
None,
)
.unwrap();
assert_eq!(
std::str::from_utf8(&replacer.replace(src.as_bytes())),
Ok(target)
);
}
#[test]
fn default_global() {
replace("a", "b", false, None, "aaa", "bbb");
}
#[test]
fn escaped_char_preservation() {
replace("a", "b", false, None, "a\\n", "b\\n");
}
#[test]
fn case_sensitive_default() {
replace("abc", "x", false, None, "abcABC", "xABC");
replace("abc", "x", true, None, "abcABC", "xABC");
}
#[test]
fn sanity_check_literal_replacements() {
replace("((special[]))", "x", true, None, "((special[]))y", "xy");
}
#[test]
fn unescape_regex_replacements() {
replace("test", r"\n", false, None, "testtest", "\n\n");
}
#[test]
fn no_unescape_literal_replacements() {
replace("test", r"\n", true, None, "testtest", r"\n\n");
}
#[test]
fn full_word_replace() {
replace("abc", "def", false, Some("w"), "abcd abc", "abcd def");
}
}

80
src/replacer/tests.rs Normal file
View file

@ -0,0 +1,80 @@
use super::*;
use proptest::prelude::*;
proptest! {
#[test]
fn validate_doesnt_panic(s in r"(\PC*\$?){0,5}") {
let _ = validate::validate_replace(&s);
}
// $ followed by a digit and a non-ident char or an ident char
#[test]
fn validate_ok(s in r"([^\$]*(\$([0-9][^a-zA-Z_0-9\$]|a-zA-Z_))?){0,5}") {
validate::validate_replace(&s).unwrap();
}
// Force at least one $ followed by a digit and an ident char
#[test]
fn validate_err(s in r"[^\$]*?\$[0-9][a-zA-Z_]\PC*") {
validate::validate_replace(&s).unwrap_err();
}
}
fn replace(
look_for: impl Into<String>,
replace_with: impl Into<String>,
literal: bool,
flags: Option<&'static str>,
src: &'static str,
target: &'static str,
) {
let replacer = Replacer::new(
look_for.into(),
replace_with.into(),
literal,
flags.map(ToOwned::to_owned),
None,
)
.unwrap();
assert_eq!(
std::str::from_utf8(&replacer.replace(src.as_bytes())),
Ok(target)
);
}
#[test]
fn default_global() {
replace("a", "b", false, None, "aaa", "bbb");
}
#[test]
fn escaped_char_preservation() {
replace("a", "b", false, None, "a\\n", "b\\n");
}
#[test]
fn case_sensitive_default() {
replace("abc", "x", false, None, "abcABC", "xABC");
replace("abc", "x", true, None, "abcABC", "xABC");
}
#[test]
fn sanity_check_literal_replacements() {
replace("((special[]))", "x", true, None, "((special[]))y", "xy");
}
#[test]
fn unescape_regex_replacements() {
replace("test", r"\n", false, None, "testtest", "\n\n");
}
#[test]
fn no_unescape_literal_replacements() {
replace("test", r"\n", true, None, "testtest", r"\n\n");
}
#[test]
fn full_word_replace() {
replace("abc", "def", false, Some("w"), "abcd abc", "abcd def");
}

380
src/replacer/validate.rs Normal file
View file

@ -0,0 +1,380 @@
use std::{error::Error, fmt, str::CharIndices};
use ansi_term::{Color, Style};
#[derive(Debug)]
pub struct InvalidReplaceCapture {
original_replace: String,
invalid_ident: Span,
num_leading_digits: usize,
}
impl Error for InvalidReplaceCapture {}
// NOTE: This code is much more allocation heavy than it needs to be, but it's
// only displayed as a hard error to the user, so it's not a big deal
impl fmt::Display for InvalidReplaceCapture {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
#[derive(Clone, Copy)]
enum SpecialChar {
Newline,
CarriageReturn,
Tab,
}
impl SpecialChar {
fn new(c: char) -> Option<Self> {
match c {
'\n' => Some(Self::Newline),
'\r' => Some(Self::CarriageReturn),
'\t' => Some(Self::Tab),
_ => None,
}
}
/// Renders as the character from the "Control Pictures" block
///
/// https://en.wikipedia.org/wiki/Control_Pictures
fn render(self) -> char {
match self {
Self::Newline => '␊',
Self::CarriageReturn => '␍',
Self::Tab => '␉',
}
}
}
let Self {
original_replace,
invalid_ident,
num_leading_digits,
} = self;
// Build up the error to show the user
let mut formatted = String::new();
let mut arrows_start = Span::start_at(0);
let special = Style::new().bold();
let error = Style::from(Color::Red).bold();
for (byte_index, c) in original_replace.char_indices() {
let (prefix, suffix, text) = match SpecialChar::new(c) {
Some(c) => {
(Some(special.prefix()), Some(special.suffix()), c.render())
}
None => {
let (prefix, suffix) = if byte_index == invalid_ident.start
{
(Some(error.prefix()), None)
} else if byte_index
== invalid_ident.end.checked_sub(1).unwrap()
{
(None, Some(error.suffix()))
} else {
(None, None)
};
(prefix, suffix, c)
}
};
if let Some(prefix) = prefix {
formatted.push_str(&prefix.to_string());
}
formatted.push(text);
if let Some(suffix) = suffix {
formatted.push_str(&suffix.to_string());
}
if byte_index < invalid_ident.start {
// Assumes that characters have a base display width of 1. While
// that's not technically true, it's near impossible to do right
// since the specifics on text rendering is up to the user's
// terminal/font. This _does_ rely on variable-width characters
// like \n, \r, and \t getting converting to single character
// representations above
arrows_start.start += 1;
}
}
// This relies on all non-curly-braced capture chars being 1 byte
let arrows_span = arrows_start.end_offset(invalid_ident.len());
let mut arrows = " ".repeat(arrows_span.start);
arrows.push_str(&format!(
"{}",
Style::new().bold().paint("^".repeat(arrows_span.len()))
));
let ident = invalid_ident.slice(original_replace);
let (number, the_rest) = ident.split_at(*num_leading_digits);
let disambiguous = format!("${{{number}}}{the_rest}");
let error_message = format!(
"The numbered capture group `{}` in the replacement text is ambiguous.",
Style::new().bold().paint(format!("${}", number).to_string())
);
let hint_message = format!(
"{}: Use curly braces to disambiguate it `{}`.",
Style::from(Color::Blue).bold().paint("hint"),
Style::new().bold().paint(disambiguous)
);
writeln!(f, "{}", error_message)?;
writeln!(f, "{}", hint_message)?;
writeln!(f, "{}", formatted)?;
write!(f, "{}", arrows)
}
}
pub fn validate_replace(s: &str) -> Result<(), InvalidReplaceCapture> {
for ident in ReplaceCaptureIter::new(s) {
let mut char_it = ident.name.char_indices();
let (_, c) = char_it.next().unwrap();
if c.is_ascii_digit() {
for (i, c) in char_it {
if !c.is_ascii_digit() {
return Err(InvalidReplaceCapture {
original_replace: s.to_owned(),
invalid_ident: ident.span,
num_leading_digits: i,
});
}
}
}
}
Ok(())
}
#[derive(Clone, Copy, Debug)]
struct Span {
start: usize,
end: usize,
}
impl Span {
fn start_at(start: usize) -> SpanOpen {
SpanOpen { start }
}
fn new(start: usize, end: usize) -> Self {
// `<` instead of `<=` because `Span` is exclusive on the upper bound
assert!(start < end);
Self { start, end }
}
fn slice(self, s: &str) -> &str {
&s[self.start..self.end]
}
fn len(self) -> usize {
self.end - self.start
}
}
#[derive(Clone, Copy)]
struct SpanOpen {
start: usize,
}
impl SpanOpen {
fn end_at(self, end: usize) -> Span {
let Self { start } = self;
Span::new(start, end)
}
fn end_offset(self, offset: usize) -> Span {
assert_ne!(offset, 0);
let Self { start } = self;
self.end_at(start + offset)
}
}
#[derive(Debug)]
struct Capture<'rep> {
name: &'rep str,
span: Span,
}
impl<'rep> Capture<'rep> {
fn new(name: &'rep str, span: Span) -> Self {
Self { name, span }
}
}
/// An iterator over the capture idents in an interpolated replacement string
///
/// This code is adapted from the `regex` crate
/// <https://docs.rs/regex-automata/latest/src/regex_automata/util/interpolate.rs.html>
/// (hence the high quality doc comments).
struct ReplaceCaptureIter<'rep>(CharIndices<'rep>);
impl<'rep> ReplaceCaptureIter<'rep> {
fn new(s: &'rep str) -> Self {
Self(s.char_indices())
}
}
impl<'rep> Iterator for ReplaceCaptureIter<'rep> {
type Item = Capture<'rep>;
fn next(&mut self) -> Option<Self::Item> {
// Continually seek to `$` until we find one that has a capture group
loop {
let (start, _) = self.0.find(|(_, c)| *c == '$')?;
let replacement = self.0.as_str();
let rep = replacement.as_bytes();
let open_span = Span::start_at(start + 1);
let maybe_cap = match rep.first()? {
// Handle escaping of '$'.
b'$' => {
self.0.next().unwrap();
None
}
b'{' => find_cap_ref_braced(rep, open_span),
_ => find_cap_ref(rep, open_span),
};
if let Some(cap) = maybe_cap {
// Advance the inner iterator to consume the capture
let mut remaining_bytes = cap.name.len();
while remaining_bytes > 0 {
let (_, c) = self.0.next().unwrap();
remaining_bytes =
remaining_bytes.checked_sub(c.len_utf8()).unwrap();
}
return Some(cap);
}
}
}
}
/// Parses a possible reference to a capture group name in the given text,
/// starting at the beginning of `replacement`.
///
/// If no such valid reference could be found, None is returned.
fn find_cap_ref(rep: &[u8], open_span: SpanOpen) -> Option<Capture<'_>> {
if rep.is_empty() {
return None;
}
let mut cap_end = 0;
while rep.get(cap_end).copied().map_or(false, is_valid_cap_letter) {
cap_end += 1;
}
if cap_end == 0 {
return None;
}
// We just verified that the range 0..cap_end is valid ASCII, so it must
// therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
// check via an unchecked conversion or by parsing the number straight from
// &[u8].
let name = core::str::from_utf8(&rep[..cap_end])
.expect("valid UTF-8 capture name");
Some(Capture::new(name, open_span.end_offset(name.len())))
}
/// Looks for a braced reference, e.g., `${foo1}`. This then looks for a
/// closing brace and returns the capture reference within the brace.
fn find_cap_ref_braced(rep: &[u8], open_span: SpanOpen) -> Option<Capture<'_>> {
assert_eq!(b'{', rep[0]);
let mut cap_end = 1;
while rep.get(cap_end).map_or(false, |&b| b != b'}') {
cap_end += 1;
}
if !rep.get(cap_end).map_or(false, |&b| b == b'}') {
return None;
}
// When looking at braced names, we don't put any restrictions on the name,
// so it's possible it could be invalid UTF-8. But a capture group name
// can never be invalid UTF-8, so if we have invalid UTF-8, then we can
// safely return None.
let name = core::str::from_utf8(&rep[..cap_end + 1]).ok()?;
Some(Capture::new(name, open_span.end_offset(name.len())))
}
fn is_valid_cap_letter(b: u8) -> bool {
matches!(b, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_')
}
#[cfg(test)]
mod tests {
use super::*;
use proptest::prelude::*;
#[test]
fn literal_dollar_sign() {
let replace = "$$0";
let mut cap_iter = ReplaceCaptureIter::new(replace);
assert!(cap_iter.next().is_none());
}
#[test]
fn wacky_captures() {
let replace =
"$foo $1 $1invalid ${1}valid ${valid} $__${__weird__}${${__}";
let cap_iter = ReplaceCaptureIter::new(replace);
let expecteds = &[
"foo",
"1",
"1invalid",
"{1}",
"{valid}",
"__",
"{__weird__}",
"{${__}",
];
for (&expected, cap) in expecteds.iter().zip(cap_iter) {
assert_eq!(expected, cap.name, "name didn't match");
assert_eq!(expected, cap.span.slice(replace), "span didn't match");
}
}
const INTERPOLATED_CAPTURE: &str = "<interpolated>";
fn upstream_interpolate(s: &str) -> String {
let mut dst = String::new();
regex_automata::util::interpolate::string(
s,
|_, dst| dst.push_str(INTERPOLATED_CAPTURE),
|_| Some(0),
&mut dst,
);
dst
}
fn our_interpolate(s: &str) -> String {
let mut after_last_write = 0;
let mut dst = String::new();
for cap in ReplaceCaptureIter::new(s) {
// This only iterates over the capture groups, so copy any text
// before the capture
// -1 here to exclude the `$` that starts a capture
dst.push_str(
&s[after_last_write..cap.span.start.checked_sub(1).unwrap()],
);
// Interpolate our capture
dst.push_str(INTERPOLATED_CAPTURE);
after_last_write = cap.span.end;
}
if after_last_write < s.len() {
// And now any text that was after the last capture
dst.push_str(&s[after_last_write..]);
}
// Handle escaping literal `$`s
dst.replace("$$", "$")
}
proptest! {
// `regex-automata` doesn't expose a way to iterate over replacement
// captures, but we can use our iterator to mimic interpolation, so that
// we can pit the two against each other
#[test]
fn interpolation_matches_upstream(s in r"\PC*(\$\PC*){0,5}") {
assert_eq!(our_interpolate(&s), upstream_interpolate(&s));
}
}
}

View file

@ -102,4 +102,96 @@ mod cli {
Ok(())
}
fn bad_replace_helper_styled(replace: &str) -> String {
let err = sd()
.args(["find", replace])
.write_stdin("stdin")
.unwrap_err();
String::from_utf8(err.as_output().unwrap().stderr.clone()).unwrap()
}
fn bad_replace_helper_plain(replace: &str) -> String {
let stderr = bad_replace_helper_styled(replace);
// TODO: no easy way to toggle off styling yet. Add a `--color <when>`
// flag, and respect things like `$NO_COLOR`. `ansi_term` is
// unmaintained, so we should migrate off of it anyways
console::AnsiCodeIterator::new(&stderr)
.filter_map(|(s, is_ansi)| (!is_ansi).then_some(s))
.collect()
}
#[test]
fn fixed_strings_ambiguous_replace_is_fine() {
sd().args([
"--fixed-strings",
"foo",
"inner_before $1fine inner_after",
])
.write_stdin("outer_before foo outer_after")
.assert()
.success()
.stdout("outer_before inner_before $1fine inner_after outer_after");
}
#[test]
fn ambiguous_replace_basic() {
let plain_stderr = bad_replace_helper_plain("before $1bad after");
insta::assert_snapshot!(plain_stderr, @r###"
error: The numbered capture group `$1` in the replacement text is ambiguous.
hint: Use curly braces to disambiguate it `${1}bad`.
before $1bad after
^^^^
"###);
}
#[test]
fn ambiguous_replace_variable_width() {
let plain_stderr = bad_replace_helper_plain("\r\n\t$1bad\r");
insta::assert_snapshot!(plain_stderr, @r###"
error: The numbered capture group `$1` in the replacement text is ambiguous.
hint: Use curly braces to disambiguate it `${1}bad`.
$1bad
^^^^
"###);
}
#[test]
fn ambiguous_replace_multibyte_char() {
let plain_stderr = bad_replace_helper_plain("😈$1bad😇");
insta::assert_snapshot!(plain_stderr, @r###"
error: The numbered capture group `$1` in the replacement text is ambiguous.
hint: Use curly braces to disambiguate it `${1}bad`.
😈$1bad😇
^^^^
"###);
}
#[test]
fn ambiguous_replace_issue_44() {
let plain_stderr =
bad_replace_helper_plain("$1Call $2($5, GetFM20ReturnKey(), $6)");
insta::assert_snapshot!(plain_stderr, @r###"
error: The numbered capture group `$1` in the replacement text is ambiguous.
hint: Use curly braces to disambiguate it `${1}Call`.
$1Call $2($5, GetFM20ReturnKey(), $6)
^^^^^
"###);
}
// NOTE: styled terminal output is platform dependent, so convert to a
// common format, in this case HTML, to check
#[test]
fn ambiguous_replace_ensure_styling() {
let styled_stderr = bad_replace_helper_styled("\t$1bad after");
let html_stderr =
ansi_to_html::convert(&styled_stderr, true, true).unwrap();
insta::assert_snapshot!(html_stderr, @r###"
<b><span style='color:#a00'>error</span></b>: The numbered capture group `<b>$1</b>` in the replacement text is ambiguous.
<b><span style='color:#00a'>hint</span></b>: Use curly braces to disambiguate it `<b>${1}bad</b>`.
<b></b>$<b><span style='color:#a00'>1bad</span></b> after
<b>^^^^</b>
"###);
}
}