replace the regex crate with the fancy-regex crate (#6227)

This commit is contained in:
Darren Schroeder 2022-08-04 14:51:02 -05:00 committed by GitHub
parent 606547ecb4
commit cdeb8de75d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 227 additions and 111 deletions

33
Cargo.lock generated
View file

@ -299,6 +299,21 @@ dependencies = [
"shlex",
]
[[package]]
name = "bit-set"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bit_field"
version = "0.10.1"
@ -1119,6 +1134,16 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "fancy-regex"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0678ab2d46fa5195aaf59ad034c083d351377d4af57f3e073c074d0da3e3c766"
dependencies = [
"bit-set",
"regex",
]
[[package]]
name = "fastrand"
version = "1.8.0"
@ -2529,6 +2554,7 @@ version = "0.66.3"
dependencies = [
"chrono",
"crossterm 0.24.0",
"fancy-regex",
"fuzzy-matcher",
"is_executable",
"lazy_static",
@ -2544,7 +2570,6 @@ dependencies = [
"nu-test-support",
"nu-utils",
"reedline",
"regex",
"rstest",
"strip-ansi-escapes",
"sysinfo",
@ -2583,6 +2608,7 @@ dependencies = [
"dtparse",
"eml-parser",
"encoding_rs",
"fancy-regex",
"filesize",
"filetime",
"fs_extra",
@ -2624,7 +2650,6 @@ dependencies = [
"rand 0.8.5",
"rayon",
"reedline",
"regex",
"reqwest",
"roxmltree",
"rstest",
@ -2678,11 +2703,11 @@ dependencies = [
name = "nu-json"
version = "0.66.3"
dependencies = [
"fancy-regex",
"lazy_static",
"linked-hash-map",
"nu-path",
"num-traits",
"regex",
"serde",
"serde_json",
]
@ -2739,13 +2764,13 @@ dependencies = [
"byte-unit",
"chrono",
"chrono-humanize",
"fancy-regex",
"indexmap",
"miette 5.1.1",
"nu-json",
"nu-path",
"nu-utils",
"num-format",
"regex",
"serde",
"serde_json",
"sys-locale",

View file

@ -20,18 +20,18 @@ nu-utils = { path = "../nu-utils", version = "0.66.3" }
nu-ansi-term = "0.46.0"
nu-color-config = { path = "../nu-color-config", version = "0.66.3" }
reedline = { version = "0.9.0", features = ["bashisms", "sqlite"]}
crossterm = "0.24.0"
miette = { version = "5.1.0", features = ["fancy"] }
thiserror = "1.0.31"
fuzzy-matcher = "0.3.7"
chrono = "0.4.19"
crossterm = "0.24.0"
fancy-regex = "0.10.0"
fuzzy-matcher = "0.3.7"
is_executable = "1.0.1"
lazy_static = "1.4.0"
log = "0.4"
regex = "1.5.4"
miette = { version = "5.1.0", features = ["fancy"] }
strip-ansi-escapes = "0.1.1"
sysinfo = "0.24.1"
thiserror = "1.0.31"
[features]
plugin = []

View file

@ -5,6 +5,7 @@ use crate::{
util::{eval_source, get_guaranteed_cwd, report_error, report_error_new},
NuHighlighter, NuValidator, NushellPrompt,
};
use fancy_regex::Regex;
use lazy_static::lazy_static;
use log::{info, trace, warn};
use miette::{IntoDiagnostic, Result};
@ -18,7 +19,6 @@ use nu_protocol::{
Type, Value, VarId,
};
use reedline::{DefaultHinter, Emacs, SqliteBackedHistory, Vi};
use regex::Regex;
use std::io::{self, Write};
use std::{sync::atomic::Ordering, time::Instant};
use strip_ansi_escapes::strip;
@ -494,7 +494,7 @@ fn get_banner(engine_state: &mut EngineState, stack: &mut Stack) -> String {
let banner = format!(
r#"{} __ ,
{} .--()°'.' {}Welcome to {}Nushell{},
{} .--()°'.' {}Welcome to {}Nushell{},
{}'|, . ,' {}based on the {}nu{} language,
{} !_-(_\ {}where all data is structured!
@ -909,7 +909,7 @@ lazy_static! {
fn looks_like_path(orig: &str) -> bool {
#[cfg(windows)]
{
if DRIVE_PATH_REGEX.is_match(orig) {
if DRIVE_PATH_REGEX.is_match(orig).unwrap_or(false) {
return true;
}
}

View file

@ -41,6 +41,7 @@ digest = "0.10.0"
dtparse = "1.2.0"
eml-parser = "0.1.0"
encoding_rs = "0.8.30"
fancy-regex = "0.10.0"
filesize = "0.2.0"
filetime = "0.2.15"
fs_extra = "1.2.0"
@ -64,7 +65,6 @@ powierza-coefficient = "1.0.1"
quick-xml = "0.23.0"
rand = "0.8"
rayon = "1.5.1"
regex = "1.5.4"
reqwest = {version = "0.11", features = ["blocking", "json"] }
roxmltree = "0.14.0"
rust-embed = "6.3.0"

View file

@ -1,3 +1,4 @@
use fancy_regex::Regex;
use nu_ansi_term::{
Color::{Default, Red, White},
Style,
@ -11,7 +12,6 @@ use nu_protocol::{
ShellError, Signature, Span, Spanned, SyntaxShape, Value,
};
use std::borrow::Borrow;
#[derive(Clone)]
pub struct Help;
@ -350,7 +350,7 @@ pub fn highlight_search_string(
string_style: &Style,
) -> Result<String, ShellError> {
let regex_string = format!("(?i){}", needle);
let regex = match regex::Regex::new(&regex_string) {
let regex = match Regex::new(&regex_string) {
Ok(regex) => regex,
Err(err) => {
return Err(ShellError::GenericError(
@ -367,21 +367,34 @@ pub fn highlight_search_string(
let mut highlighted = String::new();
for cap in regex.captures_iter(haystack) {
let start = match cap.get(0) {
Some(cap) => cap.start(),
None => 0,
};
let end = match cap.get(0) {
Some(cap) => cap.end(),
None => 0,
};
highlighted.push_str(
&string_style
.paint(&haystack[last_match_end..start])
.to_string(),
);
highlighted.push_str(&style.paint(&haystack[start..end]).to_string());
last_match_end = end;
match cap {
Ok(capture) => {
let start = match capture.get(0) {
Some(acap) => acap.start(),
None => 0,
};
let end = match capture.get(0) {
Some(acap) => acap.end(),
None => 0,
};
highlighted.push_str(
&string_style
.paint(&haystack[last_match_end..start])
.to_string(),
);
highlighted.push_str(&style.paint(&haystack[start..end]).to_string());
last_match_end = end;
}
Err(e) => {
return Err(ShellError::GenericError(
"Error with regular expression capture".into(),
e.to_string(),
None,
None,
Vec::new(),
));
}
}
}
highlighted.push_str(&string_style.paint(&haystack[last_match_end..]).to_string());

View file

@ -1,5 +1,4 @@
use super::{operations::Axis, NuDataFrame};
use nu_protocol::{ast::Operator, span, ShellError, Span, Spanned, Value};
use num::Zero;
use polars::prelude::{
@ -294,7 +293,7 @@ pub(super) fn compute_series_single_value(
compare_series_decimal(&lhs, *val, ChunkedArray::equal, lhs_span)
}
Value::String { val, .. } => {
let equal_pattern = format!("^{}$", regex::escape(val));
let equal_pattern = format!("^{}$", fancy_regex::escape(val));
contains_series_pat(&lhs, &equal_pattern, lhs_span)
}
Value::Date { val, .. } => {
@ -406,7 +405,7 @@ pub(super) fn compute_series_single_value(
},
Operator::StartsWith => match &right {
Value::String { val, .. } => {
let starts_with_pattern = format!("^{}", regex::escape(val));
let starts_with_pattern = format!("^{}", fancy_regex::escape(val));
contains_series_pat(&lhs, &starts_with_pattern, lhs_span)
}
_ => Err(ShellError::OperatorMismatch {
@ -419,7 +418,7 @@ pub(super) fn compute_series_single_value(
},
Operator::EndsWith => match &right {
Value::String { val, .. } => {
let ends_with_pattern = format!("{}$", regex::escape(val));
let ends_with_pattern = format!("{}$", fancy_regex::escape(val));
contains_series_pat(&lhs, &ends_with_pattern, lhs_span)
}
_ => Err(ShellError::OperatorMismatch {

View file

@ -1,4 +1,5 @@
use crate::help::highlight_search_string;
use fancy_regex::Regex;
use lscolors::Style as LsColors_Style;
use nu_ansi_term::{Color::Default, Style};
use nu_color_config::get_color_config;
@ -10,7 +11,6 @@ use nu_protocol::{
Signature, Span, SyntaxShape, Value,
};
use nu_utils::get_ls_colors;
use regex::Regex;
#[derive(Clone)]
pub struct Find;
@ -197,18 +197,26 @@ fn find_with_regex(
input.filter(
move |value| match value {
Value::String { val, .. } => re.is_match(val.as_str()) != invert,
Value::String { val, .. } => re.is_match(val.as_str()).unwrap_or(false) != invert,
Value::Record { cols: _, vals, .. } => {
let matches: Vec<bool> = vals
.iter()
.map(|v| re.is_match(v.into_string(" ", &config).as_str()) != invert)
.map(|v| {
re.is_match(v.into_string(" ", &config).as_str())
.unwrap_or(false)
!= invert
})
.collect();
matches.iter().any(|b| *b)
}
Value::List { vals, .. } => {
let matches: Vec<bool> = vals
.iter()
.map(|v| re.is_match(v.into_string(" ", &config).as_str()) != invert)
.map(|v| {
re.is_match(v.into_string(" ", &config).as_str())
.unwrap_or(false)
!= invert
})
.collect();
matches.iter().any(|b| *b)
}

View file

@ -1,4 +1,5 @@
use crate::formats::to::delimited::merge_descriptors;
use fancy_regex::Regex;
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
@ -6,7 +7,6 @@ use nu_protocol::{
Category, Config, Example, IntoPipelineData, PipelineData, ShellError, Signature, Spanned,
SyntaxShape, Value,
};
use regex::Regex;
use rust_embed::RustEmbed;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

View file

@ -1,3 +1,4 @@
use fancy_regex::Regex;
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
@ -5,7 +6,6 @@ use nu_protocol::{
Category, Example, ListStream, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape,
Value,
};
use regex::Regex;
#[derive(Clone)]
pub struct Parse;
@ -55,6 +55,61 @@ impl Command for Parse {
example: "echo \"hi there\" | parse -r '(?P<foo>\\w+) (?P<bar>\\w+)'",
result: Some(result),
},
Example {
description: "Parse a string using fancy-regex named capture group pattern",
example: "echo \"foo bar.\" | parse -r '\\s*(?<name>\\w+)(?=\\.)'",
result: Some(Value::List {
vals: vec![Value::Record {
cols: vec!["name".to_string()],
vals: vec![Value::test_string("bar")],
span: Span::test_data()
}],
span: Span::test_data(),
}),
},
Example {
description: "Parse a string using fancy-regex capture group pattern",
example: "echo \"foo! bar.\" | parse -r '(\\w+)(?=\\.)|(\\w+)(?=!)'",
result: Some(Value::List {
vals: vec![
Value::Record {
cols: vec!["Capture1".to_string(), "Capture2".to_string()],
vals: vec![Value::test_string(""), Value::test_string("foo")],
span: Span::test_data()
},
Value::Record {
cols: vec!["Capture1".to_string(), "Capture2".to_string()],
vals: vec![Value::test_string("bar"), Value::test_string("")],
span: Span::test_data(),
}],
span: Span::test_data(),
}),
},
Example {
description: "Parse a string using fancy-regex look behind pattern",
example: "echo \" @another(foo bar) \" | parse -r '\\s*(?<=[() ])(@\\w+)(\\([^)]*\\))?\\s*'",
result: Some(Value::List {
vals: vec![Value::Record {
cols: vec!["Capture1".to_string(), "Capture2".to_string()],
vals: vec![Value::test_string("@another"), Value::test_string("(foo bar)")],
span: Span::test_data()
}],
span: Span::test_data(),
}),
},
Example {
description: "Parse a string using fancy-regex look ahead atomic group pattern",
example: "echo \"abcd\" | parse -r '^a(bc(?=d)|b)cd$'",
result: Some(Value::List {
vals: vec![Value::Record {
cols: vec!["Capture1".to_string()],
vals: vec![Value::test_string("b")],
span: Span::test_data()
}],
span: Span::test_data(),
}),
},
]
}
@ -89,8 +144,15 @@ fn operate(
build_regex(&pattern_item, pattern_span)?
};
let regex_pattern =
Regex::new(&item_to_parse).map_err(|e| parse_regex_error(e, pattern_span))?;
let regex_pattern = Regex::new(&item_to_parse).map_err(|err| {
ShellError::GenericError(
"Error with regular expression".into(),
err.to_string(),
Some(pattern_span),
None,
Vec::new(),
)
})?;
let columns = column_names(&regex_pattern);
let mut parsed: Vec<Value> = Vec::new();
@ -102,9 +164,21 @@ fn operate(
for c in results {
let mut cols = Vec::with_capacity(columns.len());
let mut vals = Vec::with_capacity(c.len());
let captures = match c {
Ok(c) => c,
Err(e) => {
return Err(ShellError::GenericError(
"Error with regular expression captures".into(),
e.to_string(),
None,
None,
Vec::new(),
))
}
};
let mut vals = Vec::with_capacity(captures.len());
for (column_name, cap) in columns.iter().zip(c.iter().skip(1)) {
for (column_name, cap) in columns.iter().zip(captures.iter().skip(1)) {
let cap_string = cap.map(|v| v.as_str()).unwrap_or("").to_string();
cols.push(column_name.clone());
vals.push(Value::String {
@ -156,7 +230,7 @@ fn build_regex(input: &str, span: Span) -> Result<String, ShellError> {
}
if !before.is_empty() {
output.push_str(&regex::escape(&before));
output.push_str(&fancy_regex::escape(&before));
}
// Look for column as we're now at one
@ -202,35 +276,6 @@ fn column_names(regex: &Regex) -> Vec<String> {
.collect()
}
fn parse_regex_error(e: regex::Error, base_span: Span) -> ShellError {
match e {
regex::Error::Syntax(msg) => {
let mut lines = msg.lines();
let main_msg = lines
.next()
.map(|l| l.replace(':', ""))
.expect("invalid regex pattern");
let span = lines.nth(1).and_then(|l| l.find('^')).map(|space| {
let start = base_span.start + space - 3;
Span::new(start, start + 1)
});
let msg = lines
.next()
.and_then(|l| l.split(':').nth(1))
.map(|s| format!("{}: {}", main_msg, s.trim()));
match (msg, span) {
(Some(msg), Some(span)) => ShellError::DelimiterError(msg, span),
_ => ShellError::DelimiterError("Invalid regex".to_owned(), base_span),
}
}
_ => ShellError::DelimiterError("Invalid regex".to_owned(), base_span),
}
}
#[cfg(test)]
mod test {
use super::*;

View file

@ -1,3 +1,4 @@
use fancy_regex::Regex;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{Category, Example, PipelineData, ShellError, Signature, Span, Value};
@ -265,10 +266,13 @@ impl Count for Counter {
// use regex here because it can search for CRLF first and not duplicate the count
let line_ending_types = [CRLF, LF, CR, NEL, FF, LS, PS];
let pattern = &line_ending_types.join("|");
let newline_pattern = regex::Regex::new(pattern).expect("Unable to create regex");
let newline_pattern = Regex::new(pattern).expect("Unable to create regex");
let line_endings = newline_pattern
.find_iter(s)
.map(|f| f.as_str().to_string())
.map(|f| match f {
Ok(mat) => mat.as_str().to_string(),
Err(_) => "".to_string(),
})
.collect::<Vec<String>>();
let has_line_ending_suffix =

View file

@ -1,10 +1,10 @@
use fancy_regex::{NoExpand, Regex};
use nu_engine::CallExt;
use nu_protocol::{
ast::{Call, CellPath},
engine::{Command, EngineState, Stack},
Category, Example, PipelineData, ShellError, Signature, Span, Spanned, SyntaxShape, Value,
};
use regex::{NoExpand, Regex};
use std::sync::Arc;
struct Arguments {
@ -133,6 +133,23 @@ impl Command for SubCommand {
span: Span::test_data(),
}),
},
Example {
description: "Find and replace with fancy-regex",
example: r#"'a sucessful b' | str replace '\b([sS])uc(?:cs|s?)e(ed(?:ed|ing|s?)|ss(?:es|ful(?:ly)?|i(?:ons?|ve(?:ly)?)|ors?)?)\b' '${1}ucce$2'"#,
result: Some(Value::String {
val: "a successful b".to_string(),
span: Span::test_data(),
}),
},
Example {
description: "Find and replace with fancy-regex",
example: r#"'GHIKK-9+*' | str replace '[*[:xdigit:]+]' 'z'"#,
result: Some(Value::String {
val: "GHIKK-z+*".to_string(),
span: Span::test_data(),
}),
},
]
}
}

View file

@ -1,3 +1,4 @@
use fancy_regex::Regex;
use nu_engine::CallExt;
use nu_protocol::{
ast::{Call, CellPath},
@ -311,7 +312,7 @@ fn trim(s: &str, char_: Option<char>, closure_flags: &ClosureFlags) -> String {
// create a regex string that looks for 2 or more of each of these characters
let re_str = format!("{}{{2,}}", reg);
// create the regex
let re = regex::Regex::new(&re_str).expect("Error creating regular expression");
let re = Regex::new(&re_str).expect("Error creating regular expression");
// replace all mutliple occurances with single occurences represented by r
let new_str = re.replace_all(&return_string, r.to_string());
// update the return string so the next loop has the latest changes

View file

@ -1,3 +1,12 @@
use fancy_regex::Regex;
use itertools::Itertools;
use nu_engine::env_to_strings;
use nu_engine::CallExt;
use nu_protocol::engine::{EngineState, Stack};
use nu_protocol::{ast::Call, engine::Command, ShellError, Signature, SyntaxShape, Value};
use nu_protocol::{Category, Example, ListStream, PipelineData, RawStream, Span, Spanned};
use nu_system::ForegroundProcess;
use pathdiff::diff_paths;
use std::collections::HashMap;
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
@ -5,18 +14,6 @@ use std::process::{Command as CommandSys, Stdio};
use std::sync::atomic::Ordering;
use std::sync::mpsc;
use nu_engine::env_to_strings;
use nu_protocol::engine::{EngineState, Stack};
use nu_protocol::{ast::Call, engine::Command, ShellError, Signature, SyntaxShape, Value};
use nu_protocol::{Category, Example, ListStream, PipelineData, RawStream, Span, Spanned};
use itertools::Itertools;
use nu_engine::CallExt;
use nu_system::ForegroundProcess;
use pathdiff::diff_paths;
use regex::Regex;
const OUTPUT_BUFFER_SIZE: usize = 1024;
const OUTPUT_BUFFERS_IN_FLIGHT: usize = 3;
@ -505,7 +502,7 @@ impl ExternalCommand {
fn has_unsafe_shell_characters(arg: &str) -> bool {
let re: Regex = Regex::new(r"[^\w@%+=:,./-]").expect("regex to be valid");
re.is_match(arg)
re.is_match(arg).unwrap_or(false)
}
fn shell_arg_escape(arg: &str) -> String {

View file

@ -184,7 +184,9 @@ mod regex {
"#
));
assert!(actual.err.contains("unclosed group"));
assert!(actual
.err
.contains("Opening parenthesis without closing parenthesis"));
})
}
}

View file

@ -13,11 +13,11 @@ preserve_order = ["linked-hash-map", "linked-hash-map/serde_impl"]
default = ["preserve_order"]
[dependencies]
serde = "1.0"
num-traits = "0.2.14"
regex = "^1.0"
fancy-regex = "0.10.0"
lazy_static = "1"
linked-hash-map = { version="0.5", optional=true }
num-traits = "0.2.14"
serde = "1.0"
[dev-dependencies]
nu-path = { path="../nu-path", version = "0.66.3" }

View file

@ -12,7 +12,7 @@ use serde::ser;
//use super::util::ParseNumber;
use regex::Regex;
use fancy_regex::Regex;
use lazy_static::lazy_static;

View file

@ -1,7 +1,7 @@
// FIXME: re-enable tests
/*
use nu_json::Value;
use regex::Regex;
use fancy_regex::Regex;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};

View file

@ -11,19 +11,20 @@ version = "0.66.3"
[dependencies]
nu-utils = { path = "../nu-utils", version = "0.66.3" }
nu-path = { path = "../nu-path", version = "0.66.3" }
thiserror = "1.0.31"
miette = { version = "5.1.0", features = ["fancy"] }
serde = {version = "1.0.130", features = ["derive"]}
chrono = { version="0.4.19", features=["serde"] }
indexmap = { version="1.7", features=["serde-1"] }
chrono-humanize = "0.2.1"
byte-unit = "4.0.9"
serde_json = { version = "1.0", optional = true }
nu-json = { path = "../nu-json", version = "0.66.3" }
typetag = "0.1.8"
byte-unit = "4.0.9"
chrono = { version="0.4.19", features=["serde"] }
chrono-humanize = "0.2.1"
fancy-regex = "0.10.0"
indexmap = { version="1.7", features=["serde-1"] }
miette = { version = "5.1.0", features = ["fancy"] }
num-format = "0.4.0"
serde = {version = "1.0.130", features = ["derive"]}
serde_json = { version = "1.0", optional = true }
sys-locale = "0.2.0"
regex = "1.5.4"
thiserror = "1.0.31"
typetag = "0.1.8"
[features]
plugin = ["serde_json"]

View file

@ -13,11 +13,11 @@ use byte_unit::ByteUnit;
use chrono::{DateTime, Duration, FixedOffset};
use chrono_humanize::HumanTime;
pub use custom_value::CustomValue;
use fancy_regex::Regex;
pub use from_value::FromValue;
use indexmap::map::IndexMap;
use num_format::{Locale, ToFormattedString};
pub use range::*;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::{
borrow::Cow,
@ -2178,7 +2178,11 @@ impl Value {
.map_err(|e| ShellError::UnsupportedInput(format!("{e}"), *rhs_span))?;
let is_match = regex.is_match(lhs);
Ok(Value::Bool {
val: if invert { !is_match } else { is_match },
val: if invert {
!is_match.unwrap_or(false)
} else {
is_match.unwrap_or(true)
},
span,
})
}

View file

@ -63,12 +63,12 @@ fn where_not_works() -> TestResult {
#[test]
fn invalid_regex_fails() -> TestResult {
fail_test(r#"'foo' =~ '['"#, "regex parse error")
fail_test(r#"'foo' =~ '['"#, "Invalid character class")
}
#[test]
fn invalid_not_regex_fails() -> TestResult {
fail_test(r#"'foo' !~ '['"#, "regex parse error")
fail_test(r#"'foo' !~ '['"#, "Invalid character class")
}
#[test]