mirror of
https://github.com/nushell/nushell
synced 2025-01-12 13:19:01 +00:00
Infer types from regular delimited plain text unstructured files. (#1494)
* Infer types from regular delimited plain text unstructured files. * Nothing resolves to an empty string.
This commit is contained in:
parent
d8c4565413
commit
b36d21e76f
20 changed files with 751 additions and 315 deletions
|
@ -1,3 +1,3 @@
|
||||||
[build]
|
[build]
|
||||||
|
|
||||||
#rustflags = ["--cfg", "coloring_in_tokens"]
|
#rustflags = ["--cfg", "data_processing_primitives"]
|
||||||
|
|
|
@ -1,42 +1,12 @@
|
||||||
use crate::prelude::*;
|
use crate::prelude::*;
|
||||||
use csv::{ErrorKind, ReaderBuilder};
|
|
||||||
use nu_errors::ShellError;
|
use nu_errors::ShellError;
|
||||||
use nu_protocol::{Primitive, ReturnSuccess, TaggedDictBuilder, UntaggedValue, Value};
|
use nu_parser::hir::syntax_shape::{ExpandContext, SignatureRegistry};
|
||||||
|
use nu_parser::utils::{parse_line_with_separator as parse, LineSeparatedShape};
|
||||||
|
use nu_parser::TokensIterator;
|
||||||
|
use nu_protocol::{ReturnSuccess, Signature, TaggedDictBuilder, UntaggedValue, Value};
|
||||||
|
use nu_source::nom_input;
|
||||||
|
|
||||||
fn from_delimited_string_to_value(
|
use derive_new::new;
|
||||||
s: String,
|
|
||||||
headerless: bool,
|
|
||||||
separator: char,
|
|
||||||
tag: impl Into<Tag>,
|
|
||||||
) -> Result<Value, csv::Error> {
|
|
||||||
let mut reader = ReaderBuilder::new()
|
|
||||||
.has_headers(!headerless)
|
|
||||||
.delimiter(separator as u8)
|
|
||||||
.from_reader(s.as_bytes());
|
|
||||||
let tag = tag.into();
|
|
||||||
|
|
||||||
let headers = if headerless {
|
|
||||||
(1..=reader.headers()?.len())
|
|
||||||
.map(|i| format!("Column{}", i))
|
|
||||||
.collect::<Vec<String>>()
|
|
||||||
} else {
|
|
||||||
reader.headers()?.iter().map(String::from).collect()
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut rows = vec![];
|
|
||||||
for row in reader.records() {
|
|
||||||
let mut tagged_row = TaggedDictBuilder::new(&tag);
|
|
||||||
for (value, header) in row?.iter().zip(headers.iter()) {
|
|
||||||
tagged_row.insert_value(
|
|
||||||
header,
|
|
||||||
UntaggedValue::Primitive(Primitive::String(String::from(value))).into_value(&tag),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
rows.push(tagged_row.into_value());
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(UntaggedValue::Table(rows).into_value(&tag))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn from_delimited_data(
|
pub fn from_delimited_data(
|
||||||
headerless: bool,
|
headerless: bool,
|
||||||
|
@ -50,19 +20,20 @@ pub fn from_delimited_data(
|
||||||
let concat_string = input.collect_string(name_tag.clone()).await?;
|
let concat_string = input.collect_string(name_tag.clone()).await?;
|
||||||
|
|
||||||
match from_delimited_string_to_value(concat_string.item, headerless, sep, name_tag.clone()) {
|
match from_delimited_string_to_value(concat_string.item, headerless, sep, name_tag.clone()) {
|
||||||
Ok(x) => match x {
|
Ok(rows) => {
|
||||||
|
for row in rows {
|
||||||
|
match row {
|
||||||
Value { value: UntaggedValue::Table(list), .. } => {
|
Value { value: UntaggedValue::Table(list), .. } => {
|
||||||
for l in list {
|
for l in list {
|
||||||
yield ReturnSuccess::value(l);
|
yield ReturnSuccess::value(l);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
x => yield ReturnSuccess::value(x),
|
x => yield ReturnSuccess::value(x),
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
let line_one = match pretty_csv_error(err) {
|
let line_one = format!("Could not parse as {}", format_name);
|
||||||
Some(pretty) => format!("Could not parse as {} ({})", format_name,pretty),
|
|
||||||
None => format!("Could not parse as {}", format_name),
|
|
||||||
};
|
|
||||||
let line_two = format!("input cannot be parsed as {}", format_name);
|
let line_two = format!("input cannot be parsed as {}", format_name);
|
||||||
yield Err(ShellError::labeled_error_with_secondary(
|
yield Err(ShellError::labeled_error_with_secondary(
|
||||||
line_one,
|
line_one,
|
||||||
|
@ -78,25 +49,121 @@ pub fn from_delimited_data(
|
||||||
Ok(stream.to_output_stream())
|
Ok(stream.to_output_stream())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn pretty_csv_error(err: csv::Error) -> Option<String> {
|
#[derive(Debug, Clone, new)]
|
||||||
match err.kind() {
|
pub struct EmptyRegistry {
|
||||||
ErrorKind::UnequalLengths {
|
#[new(default)]
|
||||||
pos,
|
signatures: indexmap::IndexMap<String, Signature>,
|
||||||
expected_len,
|
}
|
||||||
len,
|
|
||||||
} => {
|
impl EmptyRegistry {}
|
||||||
if let Some(pos) = pos {
|
|
||||||
Some(format!(
|
impl SignatureRegistry for EmptyRegistry {
|
||||||
"Line {}: expected {} fields, found {}",
|
fn has(&self, _name: &str) -> bool {
|
||||||
pos.line(),
|
false
|
||||||
expected_len,
|
|
||||||
len
|
|
||||||
))
|
|
||||||
} else {
|
|
||||||
Some(format!("Expected {} fields, found {}", expected_len, len))
|
|
||||||
}
|
}
|
||||||
|
fn get(&self, _name: &str) -> Option<Signature> {
|
||||||
|
None
|
||||||
}
|
}
|
||||||
ErrorKind::Seek => Some("Internal error while parsing csv".to_string()),
|
fn clone_box(&self) -> Box<dyn SignatureRegistry> {
|
||||||
_ => None,
|
Box::new(self.clone())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn from_delimited_string_to_value(
|
||||||
|
s: String,
|
||||||
|
headerless: bool,
|
||||||
|
sep: char,
|
||||||
|
tag: impl Into<Tag>,
|
||||||
|
) -> Result<Vec<Value>, ShellError> {
|
||||||
|
let tag = tag.into();
|
||||||
|
|
||||||
|
let mut entries = s.lines();
|
||||||
|
|
||||||
|
let mut fields = vec![];
|
||||||
|
let mut out = vec![];
|
||||||
|
|
||||||
|
if let Some(first_entry) = entries.next() {
|
||||||
|
let tokens = match parse(&sep.to_string(), nom_input(first_entry)) {
|
||||||
|
Ok((_, tokens)) => tokens,
|
||||||
|
Err(err) => return Err(ShellError::parse_error(err)),
|
||||||
|
};
|
||||||
|
|
||||||
|
let tokens_span = tokens.span;
|
||||||
|
let source: nu_source::Text = tokens_span.slice(&first_entry).into();
|
||||||
|
|
||||||
|
if !headerless {
|
||||||
|
fields = tokens
|
||||||
|
.item
|
||||||
|
.iter()
|
||||||
|
.filter(|token| !token.is_separator())
|
||||||
|
.map(|field| field.source(&source).to_string())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
let registry = Box::new(EmptyRegistry::new());
|
||||||
|
let ctx = ExpandContext::new(registry, &source, None);
|
||||||
|
|
||||||
|
let mut iterator = TokensIterator::new(&tokens.item, ctx, tokens_span);
|
||||||
|
let (results, tokens_identified) = iterator.expand(LineSeparatedShape);
|
||||||
|
let results = results?;
|
||||||
|
|
||||||
|
let mut row = TaggedDictBuilder::new(&tag);
|
||||||
|
|
||||||
|
if headerless {
|
||||||
|
let fallback_columns = (1..=tokens_identified)
|
||||||
|
.map(|i| format!("Column{}", i))
|
||||||
|
.collect::<Vec<String>>();
|
||||||
|
|
||||||
|
for (idx, field) in results.into_iter().enumerate() {
|
||||||
|
let key = if headerless {
|
||||||
|
&fallback_columns[idx]
|
||||||
|
} else {
|
||||||
|
&fields[idx]
|
||||||
|
};
|
||||||
|
|
||||||
|
row.insert_value(key, field.into_value(&tag));
|
||||||
|
}
|
||||||
|
|
||||||
|
out.push(row.into_value())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for entry in entries {
|
||||||
|
let tokens = match parse(&sep.to_string(), nom_input(entry)) {
|
||||||
|
Ok((_, tokens)) => tokens,
|
||||||
|
Err(err) => return Err(ShellError::parse_error(err)),
|
||||||
|
};
|
||||||
|
let tokens_span = tokens.span;
|
||||||
|
|
||||||
|
let source: nu_source::Text = tokens_span.slice(&entry).into();
|
||||||
|
let registry = Box::new(EmptyRegistry::new());
|
||||||
|
let ctx = ExpandContext::new(registry, &source, None);
|
||||||
|
|
||||||
|
let mut iterator = TokensIterator::new(&tokens.item, ctx, tokens_span);
|
||||||
|
let (results, tokens_identified) = iterator.expand(LineSeparatedShape);
|
||||||
|
let results = results?;
|
||||||
|
|
||||||
|
let mut row = TaggedDictBuilder::new(&tag);
|
||||||
|
|
||||||
|
let fallback_columns = (1..=tokens_identified)
|
||||||
|
.map(|i| format!("Column{}", i))
|
||||||
|
.collect::<Vec<String>>();
|
||||||
|
|
||||||
|
for (idx, field) in results.into_iter().enumerate() {
|
||||||
|
let key = if headerless {
|
||||||
|
&fallback_columns[idx]
|
||||||
|
} else {
|
||||||
|
match fields.get(idx) {
|
||||||
|
Some(key) => key,
|
||||||
|
None => &fallback_columns[idx],
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
row.insert_value(key, field.into_value(&tag));
|
||||||
|
}
|
||||||
|
|
||||||
|
out.push(row.into_value())
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(out)
|
||||||
|
}
|
||||||
|
|
|
@ -140,6 +140,7 @@ fn to_string_tagged_value(v: &Value) -> Result<String, ShellError> {
|
||||||
| UntaggedValue::Primitive(Primitive::Path(_))
|
| UntaggedValue::Primitive(Primitive::Path(_))
|
||||||
| UntaggedValue::Primitive(Primitive::Int(_)) => as_string(v),
|
| UntaggedValue::Primitive(Primitive::Int(_)) => as_string(v),
|
||||||
UntaggedValue::Primitive(Primitive::Date(d)) => Ok(d.to_string()),
|
UntaggedValue::Primitive(Primitive::Date(d)) => Ok(d.to_string()),
|
||||||
|
UntaggedValue::Primitive(Primitive::Nothing) => Ok(String::new()),
|
||||||
UntaggedValue::Table(_) => Ok(String::from("[Table]")),
|
UntaggedValue::Table(_) => Ok(String::from("[Table]")),
|
||||||
UntaggedValue::Row(_) => Ok(String::from("[Row]")),
|
UntaggedValue::Row(_) => Ok(String::from("[Row]")),
|
||||||
_ => Err(ShellError::labeled_error(
|
_ => Err(ShellError::labeled_error(
|
||||||
|
|
|
@ -73,8 +73,36 @@ fn table_to_csv_text_skipping_headers_after_conversion() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn from_csv_text_to_table() {
|
fn infers_types() {
|
||||||
Playground::setup("filter_from_csv_test_1", |dirs, sandbox| {
|
Playground::setup("filter_from_csv_test_1", |dirs, sandbox| {
|
||||||
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
|
"los_cuatro_mosqueteros.csv",
|
||||||
|
r#"
|
||||||
|
first_name,last_name,rusty_luck
|
||||||
|
Andrés,Robalino,1,d
|
||||||
|
Jonathan,Turner,1,d
|
||||||
|
Yehuda,Katz,1,d
|
||||||
|
Jason,Gedge,1,d
|
||||||
|
"#,
|
||||||
|
)]);
|
||||||
|
|
||||||
|
let actual = nu!(
|
||||||
|
cwd: dirs.test(), pipeline(
|
||||||
|
r#"
|
||||||
|
open los_cuatro_mosqueteros.csv
|
||||||
|
| where rusty_luck > 0
|
||||||
|
| count
|
||||||
|
| echo $it
|
||||||
|
"#
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(actual, "4");
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn from_csv_text_to_table() {
|
||||||
|
Playground::setup("filter_from_csv_test_2", |dirs, sandbox| {
|
||||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
"los_tres_caballeros.txt",
|
"los_tres_caballeros.txt",
|
||||||
r#"
|
r#"
|
||||||
|
@ -102,7 +130,7 @@ fn from_csv_text_to_table() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn from_csv_text_with_separator_to_table() {
|
fn from_csv_text_with_separator_to_table() {
|
||||||
Playground::setup("filter_from_csv_test_2", |dirs, sandbox| {
|
Playground::setup("filter_from_csv_test_3", |dirs, sandbox| {
|
||||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
"los_tres_caballeros.txt",
|
"los_tres_caballeros.txt",
|
||||||
r#"
|
r#"
|
||||||
|
@ -130,7 +158,7 @@ fn from_csv_text_with_separator_to_table() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn from_csv_text_with_tab_separator_to_table() {
|
fn from_csv_text_with_tab_separator_to_table() {
|
||||||
Playground::setup("filter_from_csv_test_3", |dirs, sandbox| {
|
Playground::setup("filter_from_csv_test_4", |dirs, sandbox| {
|
||||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
"los_tres_caballeros.txt",
|
"los_tres_caballeros.txt",
|
||||||
r#"
|
r#"
|
||||||
|
@ -158,7 +186,7 @@ fn from_csv_text_with_tab_separator_to_table() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn from_csv_text_skipping_headers_to_table() {
|
fn from_csv_text_skipping_headers_to_table() {
|
||||||
Playground::setup("filter_from_csv_test_4", |dirs, sandbox| {
|
Playground::setup("filter_from_csv_test_5", |dirs, sandbox| {
|
||||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||||
"los_tres_amigos.txt",
|
"los_tres_amigos.txt",
|
||||||
r#"
|
r#"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
use crate::hir;
|
use crate::hir;
|
||||||
use crate::hir::syntax_shape::{
|
use crate::hir::syntax_shape::{
|
||||||
expand_atom, expand_syntax, BareShape, ExpandContext, ExpandSyntax, ExpansionRule,
|
ExpandSyntax, expand_atom, expand_syntax, BareShape, ExpandContext, ExpandSyntax, ExpansionRule,
|
||||||
UnspannedAtomicToken, WhitespaceShape,
|
UnspannedAtomicToken, WhitespaceShape,
|
||||||
};
|
};
|
||||||
use crate::hir::tokens_iterator::TokensIterator;
|
use crate::hir::tokens_iterator::TokensIterator;
|
||||||
|
|
|
@ -477,18 +477,6 @@ impl ExpandSyntax for MemberShape {
|
||||||
return Ok(Member::Bare(bare.span()));
|
return Ok(Member::Bare(bare.span()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* KATZ */
|
|
||||||
/* let number = NumberShape.test(token_nodes, context);
|
|
||||||
|
|
||||||
if let Some(peeked) = number {
|
|
||||||
let node = peeked.not_eof("column")?.commit();
|
|
||||||
let (n, span) = node.as_number().ok_or_else(|| {
|
|
||||||
ParseError::internal_error("can't convert node to number".spanned(node.span()))
|
|
||||||
})?;
|
|
||||||
|
|
||||||
return Ok(Member::Number(n, span))
|
|
||||||
}*/
|
|
||||||
|
|
||||||
let string = token_nodes.expand_syntax(StringShape);
|
let string = token_nodes.expand_syntax(StringShape);
|
||||||
|
|
||||||
if let Ok(syntax) = string {
|
if let Ok(syntax) = string {
|
||||||
|
|
|
@ -3,9 +3,6 @@ pub(crate) mod into_shapes;
|
||||||
pub(crate) mod pattern;
|
pub(crate) mod pattern;
|
||||||
pub(crate) mod state;
|
pub(crate) mod state;
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests;
|
|
||||||
|
|
||||||
use self::debug::ExpandTracer;
|
use self::debug::ExpandTracer;
|
||||||
use self::into_shapes::IntoShapes;
|
use self::into_shapes::IntoShapes;
|
||||||
use self::state::{Peeked, TokensIteratorState};
|
use self::state::{Peeked, TokensIteratorState};
|
||||||
|
@ -510,7 +507,7 @@ impl<'content> TokensIterator<'content> {
|
||||||
/// The purpose of `expand_infallible` is to clearly mark the infallible path through
|
/// The purpose of `expand_infallible` is to clearly mark the infallible path through
|
||||||
/// and entire list of tokens that produces a fully colored version of the source.
|
/// and entire list of tokens that produces a fully colored version of the source.
|
||||||
///
|
///
|
||||||
/// If the `ExpandSyntax` can poroduce a `Result`, make sure to use `expand_syntax`,
|
/// If the `ExpandSyntax` can produce a `Result`, make sure to use `expand_syntax`,
|
||||||
/// which will correctly show the error in the trace.
|
/// which will correctly show the error in the trace.
|
||||||
pub fn expand_infallible<U>(&mut self, shape: impl ExpandSyntax<Output = U>) -> U
|
pub fn expand_infallible<U>(&mut self, shape: impl ExpandSyntax<Output = U>) -> U
|
||||||
where
|
where
|
||||||
|
@ -536,7 +533,7 @@ impl<'content> TokensIterator<'content> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn expand<U>(&mut self, shape: impl ExpandSyntax<Output = U>) -> (U, usize)
|
pub fn expand<U>(&mut self, shape: impl ExpandSyntax<Output = U>) -> (U, usize)
|
||||||
where
|
where
|
||||||
U: std::fmt::Debug + Clone + 'static,
|
U: std::fmt::Debug + Clone + 'static,
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,46 +0,0 @@
|
||||||
use crate::hir::{syntax_shape::ExpandContext, syntax_shape::SignatureRegistry, TokensIterator};
|
|
||||||
use crate::parse::token_tree_builder::TokenTreeBuilder as b;
|
|
||||||
use nu_protocol::Signature;
|
|
||||||
use nu_source::{Span, Text};
|
|
||||||
|
|
||||||
use derive_new::new;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, new)]
|
|
||||||
struct TestRegistry {
|
|
||||||
#[new(default)]
|
|
||||||
signatures: indexmap::IndexMap<String, Signature>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TestRegistry {}
|
|
||||||
|
|
||||||
impl SignatureRegistry for TestRegistry {
|
|
||||||
fn has(&self, name: &str) -> bool {
|
|
||||||
self.signatures.contains_key(name)
|
|
||||||
}
|
|
||||||
fn get(&self, name: &str) -> Option<Signature> {
|
|
||||||
self.signatures.get(name).cloned()
|
|
||||||
}
|
|
||||||
fn clone_box(&self) -> Box<dyn SignatureRegistry> {
|
|
||||||
Box::new(self.clone())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn supplies_tokens() {
|
|
||||||
let token = b::it_var();
|
|
||||||
|
|
||||||
let (tokens, source) = b::build(token);
|
|
||||||
|
|
||||||
let tokens = vec![tokens];
|
|
||||||
let source = Text::from(&source);
|
|
||||||
|
|
||||||
let mut iterator = TokensIterator::new(
|
|
||||||
&tokens,
|
|
||||||
ExpandContext::new(Box::new(TestRegistry::new()), &source, None),
|
|
||||||
Span::unknown(),
|
|
||||||
);
|
|
||||||
|
|
||||||
let token = iterator.next().expect("Token expected.");
|
|
||||||
|
|
||||||
token.expect_var();
|
|
||||||
}
|
|
|
@ -6,6 +6,9 @@ pub mod hir;
|
||||||
pub mod parse;
|
pub mod parse;
|
||||||
pub mod parse_command;
|
pub mod parse_command;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub mod test_support;
|
||||||
|
|
||||||
pub use crate::commands::classified::{
|
pub use crate::commands::classified::{
|
||||||
external::ExternalCommand, internal::InternalCommand, ClassifiedCommand, ClassifiedPipeline,
|
external::ExternalCommand, internal::InternalCommand, ClassifiedCommand, ClassifiedPipeline,
|
||||||
};
|
};
|
||||||
|
@ -20,6 +23,11 @@ pub use crate::parse::parser::{module, pipeline};
|
||||||
pub use crate::parse::token_tree::{Delimiter, SpannedToken, Token};
|
pub use crate::parse::token_tree::{Delimiter, SpannedToken, Token};
|
||||||
pub use crate::parse::token_tree_builder::TokenTreeBuilder;
|
pub use crate::parse::token_tree_builder::TokenTreeBuilder;
|
||||||
|
|
||||||
|
pub mod utils {
|
||||||
|
pub use crate::parse::util::parse_line_with_separator;
|
||||||
|
pub use crate::parse::util::LineSeparatedShape;
|
||||||
|
}
|
||||||
|
|
||||||
use log::log_enabled;
|
use log::log_enabled;
|
||||||
use nu_errors::ShellError;
|
use nu_errors::ShellError;
|
||||||
use nu_protocol::{errln, outln};
|
use nu_protocol::{errln, outln};
|
||||||
|
|
|
@ -7,3 +7,49 @@ macro_rules! return_ok {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
macro_rules! equal_tokens {
|
||||||
|
($source:tt -> $tokens:expr) => {
|
||||||
|
let result = apply(pipeline, "pipeline", $source);
|
||||||
|
let (expected_tree, expected_source) = TokenTreeBuilder::build($tokens);
|
||||||
|
|
||||||
|
if result != expected_tree {
|
||||||
|
let debug_result = format!("{}", result.debug($source));
|
||||||
|
let debug_expected = format!("{}", expected_tree.debug(&expected_source));
|
||||||
|
|
||||||
|
if debug_result == debug_expected {
|
||||||
|
assert_eq!(
|
||||||
|
result, expected_tree,
|
||||||
|
"NOTE: actual and expected had equivalent debug serializations, source={:?}, debug_expected={:?}",
|
||||||
|
$source,
|
||||||
|
debug_expected
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
assert_eq!(debug_result, debug_expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
(<$parser:tt> $source:tt -> $tokens:expr) => {
|
||||||
|
let result = apply($parser, stringify!($parser), $source);
|
||||||
|
|
||||||
|
let (expected_tree, expected_source) = TokenTreeBuilder::build($tokens);
|
||||||
|
|
||||||
|
if result != expected_tree {
|
||||||
|
let debug_result = format!("{}", result.debug($source));
|
||||||
|
let debug_expected = format!("{}", expected_tree.debug(&expected_source));
|
||||||
|
|
||||||
|
if debug_result == debug_expected {
|
||||||
|
assert_eq!(
|
||||||
|
result, expected_tree,
|
||||||
|
"NOTE: actual and expected had equivalent debug serializations, source={:?}, debug_expected={:?}",
|
||||||
|
$source,
|
||||||
|
debug_expected
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
assert_eq!(debug_result, debug_expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
#![allow(unused)]
|
#![allow(unused)]
|
||||||
|
|
||||||
use crate::parse::{
|
use crate::parse::{
|
||||||
call_node::*, flag::*, number::*, operator::*, pipeline::*, token_tree::*,
|
call_node::*, flag::*, number::*, operator::*, pipeline::*, token_tree::*,
|
||||||
token_tree_builder::*, unit::*,
|
token_tree_builder::*, unit::*,
|
||||||
|
@ -318,6 +317,7 @@ pub fn dq_string(input: NomSpan) -> IResult<NomSpan, SpannedToken> {
|
||||||
let (input, _) = char('"')(input)?;
|
let (input, _) = char('"')(input)?;
|
||||||
let start1 = input.offset;
|
let start1 = input.offset;
|
||||||
let (input, _) = many0(none_of("\""))(input)?;
|
let (input, _) = many0(none_of("\""))(input)?;
|
||||||
|
|
||||||
let end1 = input.offset;
|
let end1 = input.offset;
|
||||||
let (input, _) = char('"')(input)?;
|
let (input, _) = char('"')(input)?;
|
||||||
let end = input.offset;
|
let end = input.offset;
|
||||||
|
@ -939,7 +939,7 @@ pub fn tight_node(input: NomSpan) -> IResult<NomSpan, Vec<SpannedToken>> {
|
||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn to_list(
|
pub fn to_list(
|
||||||
parser: impl Fn(NomSpan) -> IResult<NomSpan, SpannedToken>,
|
parser: impl Fn(NomSpan) -> IResult<NomSpan, SpannedToken>,
|
||||||
) -> impl Fn(NomSpan) -> IResult<NomSpan, Vec<SpannedToken>> {
|
) -> impl Fn(NomSpan) -> IResult<NomSpan, Vec<SpannedToken>> {
|
||||||
move |input| {
|
move |input| {
|
||||||
|
@ -1017,7 +1017,7 @@ fn parse_int<T>(frag: &str, neg: Option<T>) -> i64 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_boundary(c: Option<char>) -> bool {
|
pub fn is_boundary(c: Option<char>) -> bool {
|
||||||
match c {
|
match c {
|
||||||
None => true,
|
None => true,
|
||||||
Some(')') | Some(']') | Some('}') | Some('(') => true,
|
Some(')') | Some(']') | Some('}') | Some('(') => true,
|
||||||
|
@ -1140,59 +1140,13 @@ fn is_member_start(c: char) -> bool {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use crate::parse::parser::{module, nodes, pipeline};
|
||||||
use crate::parse::token_tree_builder::TokenTreeBuilder as b;
|
use crate::parse::token_tree_builder::TokenTreeBuilder::{self, self as b};
|
||||||
use crate::parse::token_tree_builder::{CurriedToken, TokenTreeBuilder};
|
use crate::test_support::apply;
|
||||||
|
use nu_source::PrettyDebugWithSource;
|
||||||
|
|
||||||
use pretty_assertions::assert_eq;
|
use pretty_assertions::assert_eq;
|
||||||
|
|
||||||
pub type CurriedNode<T> = Box<dyn FnOnce(&mut TokenTreeBuilder) -> T + 'static>;
|
|
||||||
|
|
||||||
macro_rules! equal_tokens {
|
|
||||||
($source:tt -> $tokens:expr) => {
|
|
||||||
let result = apply(pipeline, "pipeline", $source);
|
|
||||||
let (expected_tree, expected_source) = TokenTreeBuilder::build($tokens);
|
|
||||||
|
|
||||||
if result != expected_tree {
|
|
||||||
let debug_result = format!("{}", result.debug($source));
|
|
||||||
let debug_expected = format!("{}", expected_tree.debug(&expected_source));
|
|
||||||
|
|
||||||
if debug_result == debug_expected {
|
|
||||||
assert_eq!(
|
|
||||||
result, expected_tree,
|
|
||||||
"NOTE: actual and expected had equivalent debug serializations, source={:?}, debug_expected={:?}",
|
|
||||||
$source,
|
|
||||||
debug_expected
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
assert_eq!(debug_result, debug_expected)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
(<$parser:tt> $source:tt -> $tokens:expr) => {
|
|
||||||
let result = apply($parser, stringify!($parser), $source);
|
|
||||||
|
|
||||||
let (expected_tree, expected_source) = TokenTreeBuilder::build($tokens);
|
|
||||||
|
|
||||||
if result != expected_tree {
|
|
||||||
let debug_result = format!("{}", result.debug($source));
|
|
||||||
let debug_expected = format!("{}", expected_tree.debug(&expected_source));
|
|
||||||
|
|
||||||
if debug_result == debug_expected {
|
|
||||||
assert_eq!(
|
|
||||||
result, expected_tree,
|
|
||||||
"NOTE: actual and expected had equivalent debug serializations, source={:?}, debug_expected={:?}",
|
|
||||||
$source,
|
|
||||||
debug_expected
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
assert_eq!(debug_result, debug_expected)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_integer() {
|
fn test_integer() {
|
||||||
equal_tokens! {
|
equal_tokens! {
|
||||||
|
@ -1339,7 +1293,7 @@ mod tests {
|
||||||
fn test_flag() {
|
fn test_flag() {
|
||||||
equal_tokens! {
|
equal_tokens! {
|
||||||
<nodes>
|
<nodes>
|
||||||
"--amigos" -> b::token_list(vec![b::flag("arepas")])
|
"--amigos" -> b::token_list(vec![b::flag("amigos")])
|
||||||
}
|
}
|
||||||
|
|
||||||
equal_tokens! {
|
equal_tokens! {
|
||||||
|
@ -1721,119 +1675,4 @@ mod tests {
|
||||||
])
|
])
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// #[test]
|
|
||||||
// fn test_smoke_pipeline() {
|
|
||||||
// let _ = pretty_env_logger::try_init();
|
|
||||||
|
|
||||||
// assert_eq!(
|
|
||||||
// apply(
|
|
||||||
// pipeline,
|
|
||||||
// "pipeline",
|
|
||||||
// r#"git branch --merged | split-row "`n" | where $it != "* master""#
|
|
||||||
// ),
|
|
||||||
// build_token(b::pipeline(vec![
|
|
||||||
// (
|
|
||||||
// None,
|
|
||||||
// b::call(
|
|
||||||
// b::bare("git"),
|
|
||||||
// vec![b::sp(), b::bare("branch"), b::sp(), b::flag("merged")]
|
|
||||||
// ),
|
|
||||||
// Some(" ")
|
|
||||||
// ),
|
|
||||||
// (
|
|
||||||
// Some(" "),
|
|
||||||
// b::call(b::bare("split-row"), vec![b::sp(), b::string("`n")]),
|
|
||||||
// Some(" ")
|
|
||||||
// ),
|
|
||||||
// (
|
|
||||||
// Some(" "),
|
|
||||||
// b::call(
|
|
||||||
// b::bare("where"),
|
|
||||||
// vec![
|
|
||||||
// b::sp(),
|
|
||||||
// b::it_var(),
|
|
||||||
// b::sp(),
|
|
||||||
// b::op("!="),
|
|
||||||
// b::sp(),
|
|
||||||
// b::string("* master")
|
|
||||||
// ]
|
|
||||||
// ),
|
|
||||||
// None
|
|
||||||
// )
|
|
||||||
// ]))
|
|
||||||
// );
|
|
||||||
|
|
||||||
// assert_eq!(
|
|
||||||
// apply(pipeline, "pipeline", "ls | where { $it.size > 100 }"),
|
|
||||||
// build_token(b::pipeline(vec![
|
|
||||||
// (None, b::call(b::bare("ls"), vec![]), Some(" ")),
|
|
||||||
// (
|
|
||||||
// Some(" "),
|
|
||||||
// b::call(
|
|
||||||
// b::bare("where"),
|
|
||||||
// vec![
|
|
||||||
// b::sp(),
|
|
||||||
// b::braced(vec![
|
|
||||||
// b::path(b::it_var(), vec![b::member("size")]),
|
|
||||||
// b::sp(),
|
|
||||||
// b::op(">"),
|
|
||||||
// b::sp(),
|
|
||||||
// b::int(100)
|
|
||||||
// ])
|
|
||||||
// ]
|
|
||||||
// ),
|
|
||||||
// None
|
|
||||||
// )
|
|
||||||
// ]))
|
|
||||||
// )
|
|
||||||
// }
|
|
||||||
|
|
||||||
fn apply(
|
|
||||||
f: impl Fn(
|
|
||||||
NomSpan,
|
|
||||||
)
|
|
||||||
-> Result<(NomSpan, SpannedToken), nom::Err<(NomSpan, nom::error::ErrorKind)>>,
|
|
||||||
desc: &str,
|
|
||||||
string: &str,
|
|
||||||
) -> SpannedToken {
|
|
||||||
let result = f(nom_input(string));
|
|
||||||
|
|
||||||
match result {
|
|
||||||
Ok(value) => value.1,
|
|
||||||
Err(err) => {
|
|
||||||
let err = nu_errors::ShellError::parse_error(err);
|
|
||||||
|
|
||||||
println!("{:?}", string);
|
|
||||||
crate::hir::baseline_parse::tests::print_err(err, &nu_source::Text::from(string));
|
|
||||||
panic!("test failed")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn span((left, right): (usize, usize)) -> Span {
|
|
||||||
Span::new(left, right)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn delimited(
|
|
||||||
delimiter: Spanned<Delimiter>,
|
|
||||||
children: Vec<SpannedToken>,
|
|
||||||
left: usize,
|
|
||||||
right: usize,
|
|
||||||
) -> SpannedToken {
|
|
||||||
let start = Span::for_char(left);
|
|
||||||
let end = Span::for_char(right);
|
|
||||||
|
|
||||||
let node = DelimitedNode::new(delimiter.item, (start, end), children);
|
|
||||||
Token::Delimited(node).into_spanned((left, right))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build<T>(block: CurriedNode<T>) -> T {
|
|
||||||
let mut builder = TokenTreeBuilder::new();
|
|
||||||
block(&mut builder)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_token(block: CurriedToken) -> SpannedToken {
|
|
||||||
TokenTreeBuilder::build(block).0
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -306,6 +306,13 @@ impl SpannedToken {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_int(&self) -> bool {
|
||||||
|
match self.unspanned() {
|
||||||
|
Token::Number(RawNumber::Int(_)) => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn as_string(&self) -> Option<(Span, Span)> {
|
pub fn as_string(&self) -> Option<(Span, Span)> {
|
||||||
match self.unspanned() {
|
match self.unspanned() {
|
||||||
Token::String(inner_span) => Some((self.span(), *inner_span)),
|
Token::String(inner_span) => Some((self.span(), *inner_span)),
|
||||||
|
@ -327,16 +334,16 @@ impl SpannedToken {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_int(&self) -> bool {
|
pub fn is_dot(&self) -> bool {
|
||||||
match self.unspanned() {
|
match self.unspanned() {
|
||||||
Token::Number(RawNumber::Int(_)) => true,
|
Token::EvaluationOperator(EvaluationOperator::Dot) => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_dot(&self) -> bool {
|
pub fn is_separator(&self) -> bool {
|
||||||
match self.unspanned() {
|
match self.unspanned() {
|
||||||
Token::EvaluationOperator(EvaluationOperator::Dot) => true,
|
Token::Separator => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -479,6 +486,13 @@ impl SpannedToken {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn expect_number(&self) -> RawNumber {
|
||||||
|
match self.unspanned() {
|
||||||
|
Token::Number(raw_number) => *raw_number,
|
||||||
|
other => panic!("Expected number, found {:?}", other),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn expect_string(&self) -> (Span, Span) {
|
pub fn expect_string(&self) -> (Span, Span) {
|
||||||
match self.unspanned() {
|
match self.unspanned() {
|
||||||
Token::String(inner_span) => (self.span(), *inner_span),
|
Token::String(inner_span) => (self.span(), *inner_span),
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
|
|
2
crates/nu-parser/src/parse/util/line_delimited_parser.rs
Normal file
2
crates/nu-parser/src/parse/util/line_delimited_parser.rs
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
pub(crate) mod parser;
|
||||||
|
pub(crate) mod shape;
|
272
crates/nu-parser/src/parse/util/line_delimited_parser/parser.rs
Normal file
272
crates/nu-parser/src/parse/util/line_delimited_parser/parser.rs
Normal file
|
@ -0,0 +1,272 @@
|
||||||
|
use crate::parse::number::RawNumber;
|
||||||
|
use crate::parse::parser::{is_boundary, to_list};
|
||||||
|
use crate::parse::token_tree::SpannedToken;
|
||||||
|
use crate::parse::token_tree_builder::TokenTreeBuilder;
|
||||||
|
use nu_source::{HasSpan, NomSpan, Span, Spanned, SpannedItem};
|
||||||
|
|
||||||
|
use nom::branch::alt;
|
||||||
|
use nom::bytes::complete::{escaped, tag};
|
||||||
|
use nom::character::complete::*;
|
||||||
|
use nom::combinator::*;
|
||||||
|
use nom::multi::*;
|
||||||
|
use nom::IResult;
|
||||||
|
use nom_tracable::tracable_parser;
|
||||||
|
|
||||||
|
#[tracable_parser]
|
||||||
|
pub fn parse_line_with_separator<'a, 'b>(
|
||||||
|
separator: &'b str,
|
||||||
|
input: NomSpan<'a>,
|
||||||
|
) -> IResult<NomSpan<'a>, Spanned<Vec<SpannedToken>>> {
|
||||||
|
let start = input.offset;
|
||||||
|
let mut nodes = vec![];
|
||||||
|
let mut next_input = input;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let node_result = to_list(leaf(separator))(next_input);
|
||||||
|
|
||||||
|
let (after_node_input, next_nodes) = match node_result {
|
||||||
|
Err(_) => break,
|
||||||
|
Ok((after_node_input, next_node)) => (after_node_input, next_node),
|
||||||
|
};
|
||||||
|
|
||||||
|
nodes.extend(next_nodes);
|
||||||
|
|
||||||
|
match separated_by(separator)(after_node_input) {
|
||||||
|
Err(_) => {
|
||||||
|
next_input = after_node_input;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Ok((input, s)) => {
|
||||||
|
nodes.push(s);
|
||||||
|
next_input = input;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let end = next_input.offset;
|
||||||
|
|
||||||
|
Ok((next_input, nodes.spanned(Span::new(start, end))))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tracable_parser]
|
||||||
|
pub fn fallback_number_without(c: char) -> impl Fn(NomSpan) -> IResult<NomSpan, SpannedToken> {
|
||||||
|
move |input| {
|
||||||
|
let (input, number) = fallback_raw_number_without(c)(input)?;
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
TokenTreeBuilder::spanned_number(number, number.span()),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tracable_parser]
|
||||||
|
pub fn fallback_raw_number_without(c: char) -> impl Fn(NomSpan) -> IResult<NomSpan, RawNumber> {
|
||||||
|
move |input| {
|
||||||
|
let _anchoral = input;
|
||||||
|
let start = input.offset;
|
||||||
|
let (input, _neg) = opt(tag("-"))(input)?;
|
||||||
|
let (input, _head) = digit1(input)?;
|
||||||
|
let after_int_head = input;
|
||||||
|
|
||||||
|
match input.fragment.chars().next() {
|
||||||
|
None => return Ok((input, RawNumber::int(Span::new(start, input.offset)))),
|
||||||
|
Some('.') => (),
|
||||||
|
other if is_boundary(other) || other == Some(c) => {
|
||||||
|
return Ok((input, RawNumber::int(Span::new(start, input.offset))))
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return Err(nom::Err::Error(nom::error::make_error(
|
||||||
|
input,
|
||||||
|
nom::error::ErrorKind::Tag,
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let dot: IResult<NomSpan, NomSpan, (NomSpan, nom::error::ErrorKind)> = tag(".")(input);
|
||||||
|
|
||||||
|
let input = match dot {
|
||||||
|
Ok((input, _dot)) => input,
|
||||||
|
|
||||||
|
// it's just an integer
|
||||||
|
Err(_) => return Ok((input, RawNumber::int(Span::new(start, input.offset)))),
|
||||||
|
};
|
||||||
|
|
||||||
|
let tail_digits_result: IResult<NomSpan, _> = digit1(input);
|
||||||
|
|
||||||
|
let (input, _tail) = match tail_digits_result {
|
||||||
|
Ok((input, tail)) => (input, tail),
|
||||||
|
Err(_) => {
|
||||||
|
return Ok((
|
||||||
|
after_int_head,
|
||||||
|
RawNumber::int((start, after_int_head.offset)),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let end = input.offset;
|
||||||
|
|
||||||
|
let next = input.fragment.chars().next();
|
||||||
|
|
||||||
|
if is_boundary(next) || next == Some(c) {
|
||||||
|
Ok((input, RawNumber::decimal(Span::new(start, end))))
|
||||||
|
} else {
|
||||||
|
Err(nom::Err::Error(nom::error::make_error(
|
||||||
|
input,
|
||||||
|
nom::error::ErrorKind::Tag,
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tracable_parser]
|
||||||
|
pub fn leaf(c: &str) -> impl Fn(NomSpan) -> IResult<NomSpan, SpannedToken> + '_ {
|
||||||
|
move |input| {
|
||||||
|
let separator = c.chars().next().unwrap_or_else(|| ',');
|
||||||
|
|
||||||
|
let (input, node) = alt((
|
||||||
|
fallback_number_without(separator),
|
||||||
|
string,
|
||||||
|
fallback_string_without(c),
|
||||||
|
))(input)?;
|
||||||
|
|
||||||
|
Ok((input, node))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tracable_parser]
|
||||||
|
pub fn separated_by(c: &str) -> impl Fn(NomSpan) -> IResult<NomSpan, SpannedToken> + '_ {
|
||||||
|
move |input| {
|
||||||
|
let left = input.offset;
|
||||||
|
let (input, _) = tag(c)(input)?;
|
||||||
|
let right = input.offset;
|
||||||
|
|
||||||
|
Ok((input, TokenTreeBuilder::spanned_sep(Span::new(left, right))))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tracable_parser]
|
||||||
|
pub fn dq_string(input: NomSpan) -> IResult<NomSpan, SpannedToken> {
|
||||||
|
let start = input.offset;
|
||||||
|
let (input, _) = char('"')(input)?;
|
||||||
|
let start1 = input.offset;
|
||||||
|
let (input, _) = escaped(
|
||||||
|
none_of(r#"\""#),
|
||||||
|
'\\',
|
||||||
|
nom::character::complete::one_of(r#"\"rnt"#),
|
||||||
|
)(input)?;
|
||||||
|
|
||||||
|
let end1 = input.offset;
|
||||||
|
let (input, _) = char('"')(input)?;
|
||||||
|
let end = input.offset;
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
TokenTreeBuilder::spanned_string(Span::new(start1, end1), Span::new(start, end)),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tracable_parser]
|
||||||
|
pub fn sq_string(input: NomSpan) -> IResult<NomSpan, SpannedToken> {
|
||||||
|
let start = input.offset;
|
||||||
|
let (input, _) = char('\'')(input)?;
|
||||||
|
let start1 = input.offset;
|
||||||
|
let (input, _) = many0(none_of("\'"))(input)?;
|
||||||
|
let end1 = input.offset;
|
||||||
|
let (input, _) = char('\'')(input)?;
|
||||||
|
let end = input.offset;
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
TokenTreeBuilder::spanned_string(Span::new(start1, end1), Span::new(start, end)),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tracable_parser]
|
||||||
|
pub fn string(input: NomSpan) -> IResult<NomSpan, SpannedToken> {
|
||||||
|
alt((sq_string, dq_string))(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tracable_parser]
|
||||||
|
pub fn fallback_string_without(c: &str) -> impl Fn(NomSpan) -> IResult<NomSpan, SpannedToken> + '_ {
|
||||||
|
move |input| {
|
||||||
|
let start = input.offset;
|
||||||
|
let (input, _) = many0(none_of(c))(input)?;
|
||||||
|
let end = input.offset;
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
TokenTreeBuilder::spanned_string(Span::new(start, end), Span::new(start, end)),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::parse::token_tree_builder::TokenTreeBuilder::{self, self as b};
|
||||||
|
use crate::parse::util::parse_line_with_separator;
|
||||||
|
use crate::test_support::apply;
|
||||||
|
use nom::IResult;
|
||||||
|
|
||||||
|
use crate::parse::pipeline::PipelineElement;
|
||||||
|
use crate::parse::token_tree::SpannedToken;
|
||||||
|
use nu_source::NomSpan;
|
||||||
|
use nu_source::PrettyDebugWithSource;
|
||||||
|
|
||||||
|
use pretty_assertions::assert_eq;
|
||||||
|
|
||||||
|
pub fn nodes(input: NomSpan) -> IResult<NomSpan, SpannedToken> {
|
||||||
|
let (input, tokens) = parse_line_with_separator(",", input)?;
|
||||||
|
let span = tokens.span;
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
TokenTreeBuilder::spanned_pipeline(vec![PipelineElement::new(None, tokens)], span),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn separators() {
|
||||||
|
equal_tokens! {
|
||||||
|
<nodes>
|
||||||
|
r#""name","lastname","age""# -> b::token_list(vec![
|
||||||
|
b::string("name"),
|
||||||
|
b::sep(","),
|
||||||
|
b::string("lastname"),
|
||||||
|
b::sep(","),
|
||||||
|
b::string("age")
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
equal_tokens! {
|
||||||
|
<nodes>
|
||||||
|
r#""Andrés","Robalino",12"# -> b::token_list(vec![
|
||||||
|
b::string("Andrés"),
|
||||||
|
b::sep(","),
|
||||||
|
b::string("Robalino"),
|
||||||
|
b::sep(","),
|
||||||
|
b::int(12)
|
||||||
|
])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn strings() {
|
||||||
|
equal_tokens! {
|
||||||
|
<nodes>
|
||||||
|
r#""andres""# -> b::token_list(vec![b::string("andres")])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn numbers() {
|
||||||
|
equal_tokens! {
|
||||||
|
<nodes>
|
||||||
|
"123" -> b::token_list(vec![b::int(123)])
|
||||||
|
}
|
||||||
|
|
||||||
|
equal_tokens! {
|
||||||
|
<nodes>
|
||||||
|
"-123" -> b::token_list(vec![b::int(-123)])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,91 @@
|
||||||
|
use crate::hir::{
|
||||||
|
self, syntax_shape::ExpandSyntax, syntax_shape::FlatShape, syntax_shape::NumberExpressionShape,
|
||||||
|
syntax_shape::StringShape,
|
||||||
|
};
|
||||||
|
use crate::hir::{Expression, TokensIterator};
|
||||||
|
use crate::parse::token_tree::SeparatorType;
|
||||||
|
|
||||||
|
use nu_errors::ParseError;
|
||||||
|
use nu_protocol::UntaggedValue;
|
||||||
|
use nu_source::Span;
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub struct LineSeparatedShape;
|
||||||
|
|
||||||
|
impl ExpandSyntax for LineSeparatedShape {
|
||||||
|
type Output = Result<Vec<UntaggedValue>, ParseError>;
|
||||||
|
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
"any string line separated by"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expand<'a, 'b>(
|
||||||
|
&self,
|
||||||
|
token_nodes: &mut TokensIterator<'_>,
|
||||||
|
) -> Result<Vec<UntaggedValue>, ParseError> {
|
||||||
|
let source = token_nodes.source();
|
||||||
|
|
||||||
|
if token_nodes.at_end() {
|
||||||
|
return Ok(vec![]);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut entries = vec![];
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let field = {
|
||||||
|
token_nodes
|
||||||
|
.expand_syntax(NumberExpressionShape)
|
||||||
|
.or_else(|_| {
|
||||||
|
token_nodes
|
||||||
|
.expand_syntax(StringShape)
|
||||||
|
.map(|syntax| Expression::string(syntax.inner).into_expr(syntax.span))
|
||||||
|
})
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Ok(field) = field {
|
||||||
|
match &field.expr {
|
||||||
|
Expression::Literal(hir::Literal::Number(crate::Number::Int(i))) => {
|
||||||
|
entries.push(UntaggedValue::int(i.clone()))
|
||||||
|
}
|
||||||
|
Expression::Literal(hir::Literal::Number(crate::Number::Decimal(d))) => {
|
||||||
|
entries.push(UntaggedValue::decimal(d.clone()))
|
||||||
|
}
|
||||||
|
Expression::Literal(hir::Literal::String(span)) => {
|
||||||
|
if span.is_closed() {
|
||||||
|
entries.push(UntaggedValue::nothing())
|
||||||
|
} else {
|
||||||
|
entries.push(UntaggedValue::string(span.slice(&source)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match token_nodes.expand_infallible(SeparatorShape) {
|
||||||
|
Err(err) if !token_nodes.at_end() => return Err(err),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
if token_nodes.at_end() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(entries)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub struct SeparatorShape;
|
||||||
|
|
||||||
|
impl ExpandSyntax for SeparatorShape {
|
||||||
|
type Output = Result<Span, ParseError>;
|
||||||
|
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
"separated"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expand<'a, 'b>(&self, token_nodes: &'b mut TokensIterator<'a>) -> Result<Span, ParseError> {
|
||||||
|
token_nodes.expand_token(SeparatorType, |span| Ok((FlatShape::Separator, span)))
|
||||||
|
}
|
||||||
|
}
|
4
crates/nu-parser/src/parse/util/mod.rs
Normal file
4
crates/nu-parser/src/parse/util/mod.rs
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
mod line_delimited_parser;
|
||||||
|
|
||||||
|
pub use line_delimited_parser::parser::parse_line_with_separator;
|
||||||
|
pub use line_delimited_parser::shape::LineSeparatedShape;
|
104
crates/nu-parser/src/test_support/mod.rs
Normal file
104
crates/nu-parser/src/test_support/mod.rs
Normal file
|
@ -0,0 +1,104 @@
|
||||||
|
use crate::hir::{syntax_shape::ExpandContext, syntax_shape::SignatureRegistry};
|
||||||
|
|
||||||
|
use crate::parse::files::Files;
|
||||||
|
use crate::parse::token_tree::{DelimitedNode, Delimiter, SpannedToken, Token};
|
||||||
|
use crate::parse::token_tree_builder::{CurriedToken, TokenTreeBuilder};
|
||||||
|
|
||||||
|
use nu_errors::ShellError;
|
||||||
|
use nu_protocol::Signature;
|
||||||
|
use nu_source::{nom_input, NomSpan, Span, Spanned, Text};
|
||||||
|
|
||||||
|
pub use nu_source::PrettyDebug;
|
||||||
|
|
||||||
|
use derive_new::new;
|
||||||
|
|
||||||
|
pub type CurriedNode<T> = Box<dyn FnOnce(&mut TokenTreeBuilder) -> T + 'static>;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, new)]
|
||||||
|
pub struct TestRegistry {
|
||||||
|
#[new(default)]
|
||||||
|
signatures: indexmap::IndexMap<String, Signature>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TestRegistry {}
|
||||||
|
|
||||||
|
impl SignatureRegistry for TestRegistry {
|
||||||
|
fn has(&self, name: &str) -> bool {
|
||||||
|
self.signatures.contains_key(name)
|
||||||
|
}
|
||||||
|
fn get(&self, name: &str) -> Option<Signature> {
|
||||||
|
self.signatures.get(name).cloned()
|
||||||
|
}
|
||||||
|
fn clone_box(&self) -> Box<dyn SignatureRegistry> {
|
||||||
|
Box::new(self.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_empty_context(source: &Text, callback: impl FnOnce(ExpandContext)) {
|
||||||
|
let registry = TestRegistry::new();
|
||||||
|
callback(ExpandContext::new(Box::new(registry), source, None))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn inner_string_span(span: Span) -> Span {
|
||||||
|
Span::new(span.start() + 1, span.end() - 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn print_err(err: ShellError, source: &Text) {
|
||||||
|
let diag = err.into_diagnostic();
|
||||||
|
|
||||||
|
let writer = termcolor::StandardStream::stderr(termcolor::ColorChoice::Auto);
|
||||||
|
let mut source = source.to_string();
|
||||||
|
source.push_str(" ");
|
||||||
|
let files = Files::new(source);
|
||||||
|
let _ = language_reporting::emit(
|
||||||
|
&mut writer.lock(),
|
||||||
|
&files,
|
||||||
|
&diag,
|
||||||
|
&language_reporting::DefaultConfig,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn apply(
|
||||||
|
f: impl Fn(NomSpan) -> Result<(NomSpan, SpannedToken), nom::Err<(NomSpan, nom::error::ErrorKind)>>,
|
||||||
|
_desc: &str,
|
||||||
|
string: &str,
|
||||||
|
) -> SpannedToken {
|
||||||
|
let result = f(nom_input(string));
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(value) => value.1,
|
||||||
|
Err(err) => {
|
||||||
|
let err = nu_errors::ShellError::parse_error(err);
|
||||||
|
|
||||||
|
println!("{:?}", string);
|
||||||
|
crate::hir::baseline_parse::tests::print_err(err, &nu_source::Text::from(string));
|
||||||
|
panic!("test failed")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn span((left, right): (usize, usize)) -> Span {
|
||||||
|
Span::new(left, right)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn delimited(
|
||||||
|
delimiter: Spanned<Delimiter>,
|
||||||
|
children: Vec<SpannedToken>,
|
||||||
|
left: usize,
|
||||||
|
right: usize,
|
||||||
|
) -> SpannedToken {
|
||||||
|
let start = Span::for_char(left);
|
||||||
|
let end = Span::for_char(right);
|
||||||
|
|
||||||
|
let node = DelimitedNode::new(delimiter.item, (start, end), children);
|
||||||
|
Token::Delimited(node).into_spanned((left, right))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build<T>(block: CurriedNode<T>) -> T {
|
||||||
|
let mut builder = TokenTreeBuilder::new();
|
||||||
|
block(&mut builder)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build_token(block: CurriedToken) -> SpannedToken {
|
||||||
|
TokenTreeBuilder::build(block).0
|
||||||
|
}
|
|
@ -659,6 +659,27 @@ impl Span {
|
||||||
self.start == 0 && self.end == 0
|
self.start == 0 && self.end == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns a bool if the current Span does not cover.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// // make clean
|
||||||
|
/// // ----
|
||||||
|
/// // (0,4)
|
||||||
|
/// //
|
||||||
|
/// // ^(5,5)
|
||||||
|
///
|
||||||
|
/// let make_span = Span::new(0,4);
|
||||||
|
/// let clean_span = Span::new(5,5);
|
||||||
|
///
|
||||||
|
/// assert_eq!(make_span.is_closed(), false);
|
||||||
|
/// assert_eq!(clean_span.is_closed(), true);
|
||||||
|
/// ```
|
||||||
|
pub fn is_closed(&self) -> bool {
|
||||||
|
self.start == self.end
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns a slice of the input that covers the start and end of the current Span.
|
/// Returns a slice of the input that covers the start and end of the current Span.
|
||||||
pub fn slice<'a>(&self, source: &'a str) -> &'a str {
|
pub fn slice<'a>(&self, source: &'a str) -> &'a str {
|
||||||
&source[self.start..self.end]
|
&source[self.start..self.end]
|
||||||
|
|
|
@ -78,16 +78,17 @@ fn converts_to_int() {
|
||||||
let actual = nu!(
|
let actual = nu!(
|
||||||
cwd: "tests/fixtures/formats", pipeline(
|
cwd: "tests/fixtures/formats", pipeline(
|
||||||
r#"
|
r#"
|
||||||
open caco3_plastics.csv
|
echo '{number_as_string: "1"}'
|
||||||
| first 1
|
| from-json
|
||||||
| str tariff_item --to-int
|
| str number_as_string --to-int
|
||||||
| where tariff_item == 2509000000
|
| rename number
|
||||||
| get tariff_item
|
| where number == 1
|
||||||
|
| get number
|
||||||
| echo $it
|
| echo $it
|
||||||
"#
|
"#
|
||||||
));
|
));
|
||||||
|
|
||||||
assert_eq!(actual, "2509000000");
|
assert_eq!(actual, "1");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Reference in a new issue