mirror of
https://github.com/nushell/nushell
synced 2025-01-15 22:54:16 +00:00
new command: into value
(#10427)
# Description This new command `into value` is a command that tries to infer the type of data you have in a table. It converts each cell to a string and then runs a set of regular expressions on that string. This was mostly cobbled together after looking at how polars does similar things. The regular expressions were taken straight form polars and tweaked. ### Before ```nushell ❯ [[col1 col2 col3 col4 col5 col6]; ["1" "two" "3.4" "true" "2023-08-10 14:07:17.922050800 -05:00" "2023-09-19"]] | update col1 {|r| $r.col1 | into int } | update col3 {|r| $r.col3 | into float } | update col4 {|r| $r.col4 | into bool } | update col5 {|r| $r.col5 | into datetime } | update col6 {|r| $r.col6 | into datetime } ╭#┬col1┬col2┬col3┬col4┬───col5────┬───col6────╮ │0│ 1│two │3.40│true│a month ago│8 hours ago│ ╰─┴────┴────┴────┴────┴───────────┴───────────╯ ``` or ```nushell ❯ [[col1 col2 col3 col4 col5 col6]; ["1" "two" "3.4" "true" "2023-08-10 14:07:17.922050800 -05:00" "2023-09-19"]] | into int col1 | into float col3 | into bool col4 | into datetime col5 col6 ╭#┬col1┬col2┬col3┬col4┬───col5────┬───col6────╮ │0│ 1│two │3.40│true│a month ago│8 hours ago│ ╰─┴────┴────┴────┴────┴───────────┴───────────╯ ``` ### After ```nushell ❯ [[col1 col2 col3 col4 col5 col6]; ["1" "two" "3.4" "true" "2023-08-10 14:07:17.922050800 -05:00" "2023-09-19"]] | into value ╭#┬col1┬col2┬col3┬col4┬───col5────┬───col6────╮ │0│ 1│two │3.40│true│a month ago│8 hours ago│ ╰─┴────┴────┴────┴────┴───────────┴───────────╯ ``` It's definitely not perfect. There are ways it will fail because on regular expressions not working on all formats. My hope is that people will pick this up and add more regular expressions and if there are problems with the existing ones, change them. This is meant as a "starter command" with easy entry for newcomers that are looking to chip in and help out. Also, some tests probably need to be added to ensure what we have now doesn't break with updates. # User-Facing Changes <!-- List of all changes that impact the user experience here. This helps us keep track of breaking changes. --> # Tests + Formatting <!-- Don't forget to add tests that cover your changes. Make sure you've run and fixed any issues with these commands: - `cargo fmt --all -- --check` to check standard code formatting (`cargo fmt --all` applies these changes) - `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to check that you're using the standard code style - `cargo test --workspace` to check that all tests pass (on Windows make sure to [enable developer mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging)) - `cargo run -- -c "use std testing; testing run-tests --path crates/nu-std"` to run the tests for the standard library > **Note** > from `nushell` you can also use the `toolkit` as follows > ```bash > use toolkit.nu # or use an `env_change` hook to activate it automatically > toolkit check pr > ``` --> # After Submitting <!-- If your PR had any user-facing changes, update [the documentation](https://github.com/nushell/nushell.github.io) after the PR is merged, if necessary. This will help us keep the docs up to date. -->
This commit is contained in:
parent
29e809ad77
commit
4ae53d93fb
5 changed files with 490 additions and 13 deletions
24
Cargo.lock
generated
24
Cargo.lock
generated
|
@ -480,7 +480,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05"
|
checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
"regex-automata 0.3.0",
|
"regex-automata 0.3.8",
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -2397,9 +2397,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memchr"
|
name = "memchr"
|
||||||
version = "2.5.0"
|
version = "2.6.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memmap2"
|
name = "memmap2"
|
||||||
|
@ -4313,14 +4313,14 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
version = "1.9.0"
|
version = "1.9.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484"
|
checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"memchr",
|
"memchr",
|
||||||
"regex-automata 0.3.0",
|
"regex-automata 0.3.8",
|
||||||
"regex-syntax 0.7.3",
|
"regex-syntax 0.7.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -4331,13 +4331,13 @@ checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex-automata"
|
name = "regex-automata"
|
||||||
version = "0.3.0"
|
version = "0.3.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56"
|
checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"memchr",
|
"memchr",
|
||||||
"regex-syntax 0.7.3",
|
"regex-syntax 0.7.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -4348,9 +4348,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex-syntax"
|
name = "regex-syntax"
|
||||||
version = "0.7.3"
|
version = "0.7.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"
|
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "relative-path"
|
name = "relative-path"
|
||||||
|
|
|
@ -71,7 +71,7 @@ print-positions = "0.6"
|
||||||
quick-xml = "0.30"
|
quick-xml = "0.30"
|
||||||
rand = "0.8"
|
rand = "0.8"
|
||||||
rayon = "1.7"
|
rayon = "1.7"
|
||||||
regex = "1.7"
|
regex = "1.9.5"
|
||||||
roxmltree = "0.18"
|
roxmltree = "0.18"
|
||||||
rusqlite = { version = "0.29", features = ["bundled"], optional = true }
|
rusqlite = { version = "0.29", features = ["bundled"], optional = true }
|
||||||
same-file = "1.0"
|
same-file = "1.0"
|
||||||
|
|
|
@ -9,6 +9,7 @@ mod float;
|
||||||
mod int;
|
mod int;
|
||||||
mod record;
|
mod record;
|
||||||
mod string;
|
mod string;
|
||||||
|
mod value;
|
||||||
|
|
||||||
pub use self::bool::SubCommand as IntoBool;
|
pub use self::bool::SubCommand as IntoBool;
|
||||||
pub use self::filesize::SubCommand as IntoFilesize;
|
pub use self::filesize::SubCommand as IntoFilesize;
|
||||||
|
@ -21,3 +22,4 @@ pub use float::SubCommand as IntoFloat;
|
||||||
pub use int::SubCommand as IntoInt;
|
pub use int::SubCommand as IntoInt;
|
||||||
pub use record::SubCommand as IntoRecord;
|
pub use record::SubCommand as IntoRecord;
|
||||||
pub use string::SubCommand as IntoString;
|
pub use string::SubCommand as IntoString;
|
||||||
|
pub use value::IntoValue;
|
||||||
|
|
474
crates/nu-command/src/conversions/into/value.rs
Normal file
474
crates/nu-command/src/conversions/into/value.rs
Normal file
|
@ -0,0 +1,474 @@
|
||||||
|
use crate::parse_date_from_string;
|
||||||
|
use nu_engine::CallExt;
|
||||||
|
use nu_protocol::{
|
||||||
|
ast::Call,
|
||||||
|
engine::{Command, EngineState, Stack},
|
||||||
|
Category, Example, IntoInterruptiblePipelineData, PipelineData, PipelineIterator, ShellError,
|
||||||
|
Signature, Span, SyntaxShape, Type, Value,
|
||||||
|
};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use regex::{Regex, RegexBuilder};
|
||||||
|
use std::{collections::HashSet, iter::FromIterator};
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct IntoValue;
|
||||||
|
|
||||||
|
impl Command for IntoValue {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"into value"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build("into value")
|
||||||
|
.input_output_types(vec![(Type::Table(vec![]), Type::Table(vec![]))])
|
||||||
|
.named(
|
||||||
|
"columns",
|
||||||
|
SyntaxShape::Table(vec![]),
|
||||||
|
"list of columns to update",
|
||||||
|
Some('c'),
|
||||||
|
)
|
||||||
|
.allow_variants_without_examples(true)
|
||||||
|
.category(Category::Filters)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(&self) -> &str {
|
||||||
|
"Infer nushell datatype for each cell."
|
||||||
|
}
|
||||||
|
|
||||||
|
fn examples(&self) -> Vec<Example> {
|
||||||
|
vec![
|
||||||
|
Example {
|
||||||
|
description: "Infer Nushell values for each cell.",
|
||||||
|
example: "$table | into value",
|
||||||
|
result: None,
|
||||||
|
},
|
||||||
|
Example {
|
||||||
|
description: "Infer Nushell values for each cell in the given columns.",
|
||||||
|
example: "$table | into value -c [column1, column5]",
|
||||||
|
result: None,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(
|
||||||
|
&self,
|
||||||
|
engine_state: &EngineState,
|
||||||
|
stack: &mut Stack,
|
||||||
|
call: &Call,
|
||||||
|
input: PipelineData,
|
||||||
|
) -> Result<PipelineData, ShellError> {
|
||||||
|
let engine_state = engine_state.clone();
|
||||||
|
let metadata = input.metadata();
|
||||||
|
let ctrlc = engine_state.ctrlc.clone();
|
||||||
|
let span = call.head;
|
||||||
|
|
||||||
|
// the columns to update
|
||||||
|
let columns: Option<Value> = call.get_flag(&engine_state, stack, "columns")?;
|
||||||
|
let columns: Option<HashSet<String>> = match columns {
|
||||||
|
Some(val) => {
|
||||||
|
let cols = val
|
||||||
|
.as_list()?
|
||||||
|
.iter()
|
||||||
|
.map(|val| val.as_string())
|
||||||
|
.collect::<Result<Vec<String>, ShellError>>()?;
|
||||||
|
Some(HashSet::from_iter(cols))
|
||||||
|
}
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(UpdateCellIterator {
|
||||||
|
input: input.into_iter(),
|
||||||
|
columns,
|
||||||
|
span,
|
||||||
|
}
|
||||||
|
.into_pipeline_data(ctrlc)
|
||||||
|
.set_metadata(metadata))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct UpdateCellIterator {
|
||||||
|
input: PipelineIterator,
|
||||||
|
columns: Option<HashSet<String>>,
|
||||||
|
span: Span,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for UpdateCellIterator {
|
||||||
|
type Item = Value;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match self.input.next() {
|
||||||
|
Some(val) => {
|
||||||
|
if let Some(ref cols) = self.columns {
|
||||||
|
if !val.columns().iter().any(|c| cols.contains(c)) {
|
||||||
|
return Some(val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let span = val.span();
|
||||||
|
match val {
|
||||||
|
Value::Record { val, .. } => Some(Value::record(
|
||||||
|
val.into_iter()
|
||||||
|
.map(|(col, val)| match &self.columns {
|
||||||
|
Some(cols) if !cols.contains(&col) => (col, val),
|
||||||
|
_ => (
|
||||||
|
col,
|
||||||
|
match process_cell(val, span) {
|
||||||
|
Ok(val) => val,
|
||||||
|
Err(err) => Value::error(err, span),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
span,
|
||||||
|
)),
|
||||||
|
val => match process_cell(val, self.span) {
|
||||||
|
Ok(val) => Some(val),
|
||||||
|
Err(err) => Some(Value::error(err, self.span)),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function will check each cell to see if it matches a regular expression
|
||||||
|
// for a particular datatype. If it does, it will convert the cell to that datatype.
|
||||||
|
fn process_cell(val: Value, span: Span) -> Result<Value, ShellError> {
|
||||||
|
// step 1: convert value to string
|
||||||
|
let val_str = val.as_string().unwrap_or_default();
|
||||||
|
|
||||||
|
// step 2: bounce string up against regexes
|
||||||
|
if BOOLEAN_RE.is_match(&val_str) {
|
||||||
|
let bval = val_str
|
||||||
|
.parse::<bool>()
|
||||||
|
.map_err(|_| ShellError::CantConvert {
|
||||||
|
to_type: "string".to_string(),
|
||||||
|
from_type: "bool".to_string(),
|
||||||
|
span,
|
||||||
|
help: Some(format!(
|
||||||
|
r#""{val_str}" does not represent a valid boolean value"#
|
||||||
|
)),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(Value::bool(bval, span))
|
||||||
|
} else if FLOAT_RE.is_match(&val_str) {
|
||||||
|
let fval = val_str
|
||||||
|
.parse::<f64>()
|
||||||
|
.map_err(|_| ShellError::CantConvert {
|
||||||
|
to_type: "string".to_string(),
|
||||||
|
from_type: "float".to_string(),
|
||||||
|
span,
|
||||||
|
help: Some(format!(
|
||||||
|
r#""{val_str}" does not represent a valid floating point value"#
|
||||||
|
)),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(Value::float(fval, span))
|
||||||
|
} else if INTEGER_RE.is_match(&val_str) {
|
||||||
|
let ival = val_str
|
||||||
|
.parse::<i64>()
|
||||||
|
.map_err(|_| ShellError::CantConvert {
|
||||||
|
to_type: "string".to_string(),
|
||||||
|
from_type: "int".to_string(),
|
||||||
|
span,
|
||||||
|
help: Some(format!(
|
||||||
|
r#""{val_str}" does not represent a valid integer value"#
|
||||||
|
)),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(Value::int(ival, span))
|
||||||
|
} else if INTEGER_WITH_DELIMS_RE.is_match(&val_str) {
|
||||||
|
let mut val_str = val_str;
|
||||||
|
val_str.retain(|x| !['_', ','].contains(&x));
|
||||||
|
|
||||||
|
let ival = val_str
|
||||||
|
.parse::<i64>()
|
||||||
|
.map_err(|_| ShellError::CantConvert {
|
||||||
|
to_type: "string".to_string(),
|
||||||
|
from_type: "int".to_string(),
|
||||||
|
span,
|
||||||
|
help: Some(format!(
|
||||||
|
r#""{val_str}" does not represent a valid integer value"#
|
||||||
|
)),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(Value::int(ival, span))
|
||||||
|
} else if DATETIME_DMY_RE.is_match(&val_str) {
|
||||||
|
let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
|
||||||
|
to_type: "date".to_string(),
|
||||||
|
from_type: "string".to_string(),
|
||||||
|
span,
|
||||||
|
help: Some(format!(
|
||||||
|
r#""{val_str}" does not represent a valid DATETIME_MDY_RE value"#
|
||||||
|
)),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(Value::date(dt, span))
|
||||||
|
} else if DATETIME_YMD_RE.is_match(&val_str) {
|
||||||
|
let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
|
||||||
|
to_type: "date".to_string(),
|
||||||
|
from_type: "string".to_string(),
|
||||||
|
span,
|
||||||
|
help: Some(format!(
|
||||||
|
r#""{val_str}" does not represent a valid DATETIME_YMD_RE value"#
|
||||||
|
)),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(Value::date(dt, span))
|
||||||
|
} else if DATETIME_YMDZ_RE.is_match(&val_str) {
|
||||||
|
let dt = parse_date_from_string(&val_str, span).map_err(|_| ShellError::CantConvert {
|
||||||
|
to_type: "date".to_string(),
|
||||||
|
from_type: "string".to_string(),
|
||||||
|
span,
|
||||||
|
help: Some(format!(
|
||||||
|
r#""{val_str}" does not represent a valid DATETIME_YMDZ_RE value"#
|
||||||
|
)),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(Value::date(dt, span))
|
||||||
|
} else {
|
||||||
|
// If we don't know what it is, just return whatever it was passed in as
|
||||||
|
Ok(val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// region: datatype regexes
|
||||||
|
const DATETIME_DMY_PATTERN: &str = r#"(?x)
|
||||||
|
^
|
||||||
|
['"]? # optional quotes
|
||||||
|
(?:\d{1,2}) # day
|
||||||
|
[-/] # separator
|
||||||
|
(?P<month>[01]?\d{1}) # month
|
||||||
|
[-/] # separator
|
||||||
|
(?:\d{4,}) # year
|
||||||
|
(?:
|
||||||
|
[T\ ] # separator
|
||||||
|
(?:\d{2}) # hour
|
||||||
|
:? # separator
|
||||||
|
(?:\d{2}) # minute
|
||||||
|
(?:
|
||||||
|
:? # separator
|
||||||
|
(?:\d{2}) # second
|
||||||
|
(?:
|
||||||
|
\.(?:\d{1,9}) # subsecond
|
||||||
|
)?
|
||||||
|
)?
|
||||||
|
)?
|
||||||
|
['"]? # optional quotes
|
||||||
|
$
|
||||||
|
"#;
|
||||||
|
|
||||||
|
static DATETIME_DMY_RE: Lazy<Regex> =
|
||||||
|
Lazy::new(|| Regex::new(DATETIME_DMY_PATTERN).expect("datetime_dmy_pattern should be valid"));
|
||||||
|
const DATETIME_YMD_PATTERN: &str = r#"(?x)
|
||||||
|
^
|
||||||
|
['"]? # optional quotes
|
||||||
|
(?:\d{4,}) # year
|
||||||
|
[-/] # separator
|
||||||
|
(?P<month>[01]?\d{1}) # month
|
||||||
|
[-/] # separator
|
||||||
|
(?:\d{1,2}) # day
|
||||||
|
(?:
|
||||||
|
[T\ ] # separator
|
||||||
|
(?:\d{2}) # hour
|
||||||
|
:? # separator
|
||||||
|
(?:\d{2}) # minute
|
||||||
|
(?:
|
||||||
|
:? # separator
|
||||||
|
(?:\d{2}) # seconds
|
||||||
|
(?:
|
||||||
|
\.(?:\d{1,9}) # subsecond
|
||||||
|
)?
|
||||||
|
)?
|
||||||
|
)?
|
||||||
|
['"]? # optional quotes
|
||||||
|
$
|
||||||
|
"#;
|
||||||
|
static DATETIME_YMD_RE: Lazy<Regex> =
|
||||||
|
Lazy::new(|| Regex::new(DATETIME_YMD_PATTERN).expect("datetime_ymd_pattern should be valid"));
|
||||||
|
//2023-03-24 16:44:17.865147299 -05:00
|
||||||
|
const DATETIME_YMDZ_PATTERN: &str = r#"(?x)
|
||||||
|
^
|
||||||
|
['"]? # optional quotes
|
||||||
|
(?:\d{4,}) # year
|
||||||
|
[-/] # separator
|
||||||
|
(?P<month>[01]?\d{1}) # month
|
||||||
|
[-/] # separator
|
||||||
|
(?:\d{1,2}) # day
|
||||||
|
[T\ ] # separator
|
||||||
|
(?:\d{2}) # hour
|
||||||
|
:? # separator
|
||||||
|
(?:\d{2}) # minute
|
||||||
|
(?:
|
||||||
|
:? # separator
|
||||||
|
(?:\d{2}) # second
|
||||||
|
(?:
|
||||||
|
\.(?:\d{1,9}) # subsecond
|
||||||
|
)?
|
||||||
|
)?
|
||||||
|
\s? # optional space
|
||||||
|
(?:
|
||||||
|
# offset (e.g. +01:00)
|
||||||
|
[+-](?:\d{2})
|
||||||
|
:?
|
||||||
|
(?:\d{2})
|
||||||
|
# or Zulu suffix
|
||||||
|
|Z
|
||||||
|
)
|
||||||
|
['"]? # optional quotes
|
||||||
|
$
|
||||||
|
"#;
|
||||||
|
static DATETIME_YMDZ_RE: Lazy<Regex> =
|
||||||
|
Lazy::new(|| Regex::new(DATETIME_YMDZ_PATTERN).expect("datetime_ymdz_pattern should be valid"));
|
||||||
|
|
||||||
|
static FLOAT_RE: Lazy<Regex> = Lazy::new(|| {
|
||||||
|
Regex::new(r"^\s*[-+]?((\d*\.\d+)([eE][-+]?\d+)?|inf|NaN|(\d+)[eE][-+]?\d+|\d+\.)$")
|
||||||
|
.expect("float pattern should be valid")
|
||||||
|
});
|
||||||
|
|
||||||
|
static INTEGER_RE: Lazy<Regex> =
|
||||||
|
Lazy::new(|| Regex::new(r"^\s*-?(\d+)$").expect("integer pattern should be valid"));
|
||||||
|
|
||||||
|
static INTEGER_WITH_DELIMS_RE: Lazy<Regex> = Lazy::new(|| {
|
||||||
|
Regex::new(r"^\s*-?(\d{1,3}([,_]\d{3})+)$")
|
||||||
|
.expect("integer with delimiters pattern should be valid")
|
||||||
|
});
|
||||||
|
|
||||||
|
static BOOLEAN_RE: Lazy<Regex> = Lazy::new(|| {
|
||||||
|
RegexBuilder::new(r"^\s*(true)$|^(false)$")
|
||||||
|
.case_insensitive(true)
|
||||||
|
.build()
|
||||||
|
.expect("boolean pattern should be valid")
|
||||||
|
});
|
||||||
|
// endregion:
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_examples() {
|
||||||
|
use crate::test_examples;
|
||||||
|
|
||||||
|
test_examples(IntoValue {})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_float_parse() {
|
||||||
|
// The regex should work on all these but nushell's float parser is more strict
|
||||||
|
assert!(FLOAT_RE.is_match("0.1"));
|
||||||
|
assert!(FLOAT_RE.is_match("3.0"));
|
||||||
|
assert!(FLOAT_RE.is_match("3.00001"));
|
||||||
|
assert!(FLOAT_RE.is_match("-9.9990e-003"));
|
||||||
|
assert!(FLOAT_RE.is_match("9.9990e+003"));
|
||||||
|
assert!(FLOAT_RE.is_match("9.9990E+003"));
|
||||||
|
assert!(FLOAT_RE.is_match("9.9990E+003"));
|
||||||
|
assert!(FLOAT_RE.is_match(".5"));
|
||||||
|
assert!(FLOAT_RE.is_match("2.5E-10"));
|
||||||
|
assert!(FLOAT_RE.is_match("2.5e10"));
|
||||||
|
assert!(FLOAT_RE.is_match("NaN"));
|
||||||
|
assert!(FLOAT_RE.is_match("-NaN"));
|
||||||
|
assert!(FLOAT_RE.is_match("-inf"));
|
||||||
|
assert!(FLOAT_RE.is_match("inf"));
|
||||||
|
assert!(FLOAT_RE.is_match("-7e-05"));
|
||||||
|
assert!(FLOAT_RE.is_match("7e-05"));
|
||||||
|
assert!(FLOAT_RE.is_match("+7e+05"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_int_parse() {
|
||||||
|
assert!(INTEGER_RE.is_match("0"));
|
||||||
|
assert!(INTEGER_RE.is_match("1"));
|
||||||
|
assert!(INTEGER_RE.is_match("10"));
|
||||||
|
assert!(INTEGER_RE.is_match("100"));
|
||||||
|
assert!(INTEGER_RE.is_match("1000"));
|
||||||
|
assert!(INTEGER_RE.is_match("10000"));
|
||||||
|
assert!(INTEGER_RE.is_match("100000"));
|
||||||
|
assert!(INTEGER_RE.is_match("1000000"));
|
||||||
|
assert!(INTEGER_RE.is_match("10000000"));
|
||||||
|
assert!(INTEGER_RE.is_match("100000000"));
|
||||||
|
assert!(INTEGER_RE.is_match("1000000000"));
|
||||||
|
assert!(INTEGER_RE.is_match("10000000000"));
|
||||||
|
assert!(INTEGER_RE.is_match("100000000000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("1_000_000_000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("10_000_000_000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("100_000_000_000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("100,000,000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("1,000,000,000"));
|
||||||
|
assert!(INTEGER_WITH_DELIMS_RE.is_match("10,000,000,000"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_bool_parse() {
|
||||||
|
assert!(BOOLEAN_RE.is_match("true"));
|
||||||
|
assert!(BOOLEAN_RE.is_match("false"));
|
||||||
|
assert!(!BOOLEAN_RE.is_match("1"));
|
||||||
|
assert!(!BOOLEAN_RE.is_match("0"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_datetime_ymdz_pattern() {
|
||||||
|
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00Z"));
|
||||||
|
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789Z"));
|
||||||
|
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01:00"));
|
||||||
|
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789+01:00"));
|
||||||
|
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01:00"));
|
||||||
|
assert!(DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789-01:00"));
|
||||||
|
assert!(DATETIME_YMDZ_RE.is_match("'2022-01-01T00:00:00Z'"));
|
||||||
|
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00"));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00."));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789"));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01"));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+01:0"));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00+1:00"));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789+01"));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789+01:0"));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789+1:00"));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01"));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-01:0"));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00-1:00"));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789-01"));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789-01:0"));
|
||||||
|
assert!(!DATETIME_YMDZ_RE.is_match("2022-01-01T00:00:00.123456789-1:00"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_datetime_ymd_pattern() {
|
||||||
|
assert!(DATETIME_YMD_RE.is_match("2022-01-01"));
|
||||||
|
assert!(DATETIME_YMD_RE.is_match("2022/01/01"));
|
||||||
|
assert!(DATETIME_YMD_RE.is_match("2022-01-01T00:00:00"));
|
||||||
|
assert!(DATETIME_YMD_RE.is_match("2022-01-01T00:00:00.000000000"));
|
||||||
|
assert!(DATETIME_YMD_RE.is_match("'2022-01-01'"));
|
||||||
|
|
||||||
|
// The regex isn't this specific, but it would be nice if it were
|
||||||
|
// assert!(!DATETIME_YMD_RE.is_match("2022-13-01"));
|
||||||
|
// assert!(!DATETIME_YMD_RE.is_match("2022-01-32"));
|
||||||
|
// assert!(!DATETIME_YMD_RE.is_match("2022-01-01T24:00:00"));
|
||||||
|
// assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:60:00"));
|
||||||
|
// assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:00:60"));
|
||||||
|
assert!(!DATETIME_YMD_RE.is_match("2022-01-01T00:00:00.0000000000"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_datetime_dmy_pattern() {
|
||||||
|
assert!(DATETIME_DMY_RE.is_match("31-12-2021"));
|
||||||
|
assert!(DATETIME_DMY_RE.is_match("01/01/2022"));
|
||||||
|
assert!(DATETIME_DMY_RE.is_match("15-06-2023 12:30"));
|
||||||
|
assert!(!DATETIME_DMY_RE.is_match("2022-13-01"));
|
||||||
|
assert!(!DATETIME_DMY_RE.is_match("2022-01-32"));
|
||||||
|
assert!(!DATETIME_DMY_RE.is_match("2022-01-01 24:00"));
|
||||||
|
}
|
||||||
|
}
|
|
@ -293,6 +293,7 @@ pub fn add_shell_command_context(mut engine_state: EngineState) -> EngineState {
|
||||||
IntoInt,
|
IntoInt,
|
||||||
IntoRecord,
|
IntoRecord,
|
||||||
IntoString,
|
IntoString,
|
||||||
|
IntoValue,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Env
|
// Env
|
||||||
|
|
Loading…
Reference in a new issue