From 8cf9bc99938e0b3538d90cf6b2d2c46258887750 Mon Sep 17 00:00:00 2001 From: mike <98623181+1Kinoti@users.noreply.github.com> Date: Fri, 24 Mar 2023 14:54:06 +0300 Subject: [PATCH] allow lists to have type annotations (#8529) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this pr refines #8270 and closes #8109 # description examples: the original syntax is okay ```nu def okay [nums: list] {} # the type of list will be list ``` empty annotations are allowed in any variation the last two may be caught by a future formatter, but do not affect `nu` code currently ```nu def okay [nums: list<>] {} # okay def okay [nums: list< >] {} # weird but also okay def okay [nums: list< >] {} # also weird but okay ``` types are allowed (See [notes](#notes) below) ```nu def okay [nums: list] {} # `test [a b c]` will throw an error def okay [nums: list< int > {} # any amount of space within the angle brackets is okay def err [nums: list ] {} # this is not okay, `nums` and `` will be parsed as # two separate params, ``` nested annotations are allowed in many variations ```nu def okay [items: list>] {} def okay [items: list] {} ``` any unterminated annotation is caught ```nu Error: nu::parser::unexpected_eof × Unexpected end of code. ╭─[source:1:1] 1 │ def err [nums: list ╰──── ``` unknown types are flagged ```nu Error: nu::parser::unknown_type × Unknown type. ╭─[source:1:1] 1 │ def err [nums: list] {} · ─┬─ · ╰── unknown type ╰──── Error: nu::parser::unknown_type × Unknown type. ╭─[source:1:1] 1 │ def err [nums: list] {} · ─────┬───── · ╰── unknown type ╰──── ``` # notes the error message for mismatched types in not as intuitive ```nu Error: nu::parser::parse_mismatch × Parse mismatch during operation. ╭─[source:1:1] 1 │ def err [nums: list] {}; err [a b c] · ┬ · ╰── expected int ╰──── ``` it should be something like this ```nu Error: nu::parser::parse_mismatch × Parse mismatch during operation. ╭─[source:1:1] 1 │ def err [nums: list] {}; err [a b c] · ──┬── · ╰── expected list ╰──── ``` this is currently not implemented --- crates/nu-command/tests/commands/def.rs | 12 --- crates/nu-parser/src/lex.rs | 46 ++++++++++ crates/nu-parser/src/lib.rs | 2 +- crates/nu-parser/src/parser.rs | 59 ++++++++++++- crates/nu-parser/tests/test_lex.rs | 96 +++++++++++++++++++- src/tests.rs | 1 + src/tests/test_signatures.rs | 113 ++++++++++++++++++++++++ 7 files changed, 311 insertions(+), 18 deletions(-) create mode 100644 src/tests/test_signatures.rs diff --git a/crates/nu-command/tests/commands/def.rs b/crates/nu-command/tests/commands/def.rs index ffa1493553..3537041f34 100644 --- a/crates/nu-command/tests/commands/def.rs +++ b/crates/nu-command/tests/commands/def.rs @@ -150,18 +150,6 @@ fn def_fails_with_invalid_name() { assert!(actual.err.contains(err_msg)); } -#[test] -fn def_errors_with_specified_list_type() { - let actual = nu!( - cwd: ".", pipeline( - r#" - def test-command [ foo: list ] {} - "# - )); - - assert!(actual.err.contains("unknown type")); -} - #[test] fn def_with_list() { Playground::setup("def_with_list", |dirs, _| { diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index dd0e58aaa2..8d51687a73 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -31,6 +31,7 @@ pub enum BlockKind { Paren, CurlyBracket, SquareBracket, + AngleBracket, } impl BlockKind { @@ -39,6 +40,7 @@ impl BlockKind { BlockKind::Paren => b')', BlockKind::SquareBracket => b']', BlockKind::CurlyBracket => b'}', + BlockKind::AngleBracket => b'>', } } } @@ -77,6 +79,7 @@ pub fn lex_item( span_offset: usize, additional_whitespace: &[u8], special_tokens: &[u8], + in_signature: bool, ) -> (Token, Option) { // This variable tracks the starting character of a string literal, so that // we remain inside the string literal lexer mode until we encounter the @@ -156,6 +159,12 @@ pub fn lex_item( } else if c == b'[' { // We encountered an opening `[` delimiter. block_level.push(BlockKind::SquareBracket); + } else if c == b'<' && in_signature { + block_level.push(BlockKind::AngleBracket); + } else if c == b'>' && in_signature { + if let Some(BlockKind::AngleBracket) = block_level.last() { + let _ = block_level.pop(); + } } else if c == b']' { // We encountered a closing `]` delimiter. Pop off the opening `[` // delimiter. @@ -299,12 +308,48 @@ pub fn lex_item( } } +pub fn lex_signature( + input: &[u8], + span_offset: usize, + additional_whitespace: &[u8], + special_tokens: &[u8], + skip_comment: bool, +) -> (Vec, Option) { + lex_internal( + input, + span_offset, + additional_whitespace, + special_tokens, + skip_comment, + true, + ) +} + pub fn lex( input: &[u8], span_offset: usize, additional_whitespace: &[u8], special_tokens: &[u8], skip_comment: bool, +) -> (Vec, Option) { + lex_internal( + input, + span_offset, + additional_whitespace, + special_tokens, + skip_comment, + false, + ) +} + +fn lex_internal( + input: &[u8], + span_offset: usize, + additional_whitespace: &[u8], + special_tokens: &[u8], + skip_comment: bool, + // within signatures we want to treat `<` and `>` specially + in_signature: bool, ) -> (Vec, Option) { let mut error = None; @@ -427,6 +472,7 @@ pub fn lex( span_offset, additional_whitespace, special_tokens, + in_signature, ); if error.is_none() { error = err; diff --git a/crates/nu-parser/src/lib.rs b/crates/nu-parser/src/lib.rs index 018e00f2aa..ff980a2fc2 100644 --- a/crates/nu-parser/src/lib.rs +++ b/crates/nu-parser/src/lib.rs @@ -16,7 +16,7 @@ pub use flatten::{ flatten_block, flatten_expression, flatten_pipeline, flatten_pipeline_element, FlatShape, }; pub use known_external::KnownExternal; -pub use lex::{lex, Token, TokenContents}; +pub use lex::{lex, lex_signature, Token, TokenContents}; pub use lite_parser::{lite_parse, LiteBlock, LiteElement}; pub use parse_keywords::*; diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index a44148121d..788bafbcf0 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -1,6 +1,6 @@ use crate::{ eval::{eval_constant, value_as_string}, - lex, + lex::{lex, lex_signature}, lite_parser::{lite_parse, LiteCommand, LiteElement}, parse_mut, parse_patterns::{parse_match_pattern, parse_pattern}, @@ -3039,6 +3039,8 @@ pub fn parse_shape_name( bytes: &[u8], span: Span, ) -> (SyntaxShape, Option) { + let mut error = None; + let result = match bytes { b"any" => SyntaxShape::Any, b"binary" => SyntaxShape::Binary, @@ -3060,7 +3062,11 @@ pub fn parse_shape_name( b"int" => SyntaxShape::Int, b"import-pattern" => SyntaxShape::ImportPattern, b"keyword" => SyntaxShape::Keyword(vec![], Box::new(SyntaxShape::Any)), - b"list" => SyntaxShape::List(Box::new(SyntaxShape::Any)), + _ if bytes.starts_with(b"list") => { + let (sig, err) = parse_list_shape(working_set, bytes, span); + error = error.or(err); + sig + } b"math" => SyntaxShape::MathExpression, b"nothing" => SyntaxShape::Nothing, b"number" => SyntaxShape::Number, @@ -3104,7 +3110,51 @@ pub fn parse_shape_name( } }; - (result, None) + (result, error) +} + +fn parse_list_shape( + working_set: &StateWorkingSet, + bytes: &[u8], + span: Span, +) -> (SyntaxShape, Option) { + assert!(bytes.starts_with(b"list")); + + if bytes == b"list" { + (SyntaxShape::List(Box::new(SyntaxShape::Any)), None) + } else if bytes.starts_with(b"list<") { + let start = span.start + 5; + + // if the annotation is unterminated, we want to return early to avoid + // overflows with spans + let end = if bytes.ends_with(b">") { + span.end - 1 + } else { + let err = ParseError::Unclosed(">".into(), span); + return (SyntaxShape::List(Box::new(SyntaxShape::Any)), Some(err)); + }; + + let inner_span = Span::new(start, end); + + let inner_text = String::from_utf8_lossy(working_set.get_span_contents(inner_span)); + + // remove any extra whitespace, for example `list< string >` becomes `list` + let inner_bytes = inner_text.trim().as_bytes(); + + // list<> + if inner_bytes.is_empty() { + (SyntaxShape::List(Box::new(SyntaxShape::Any)), None) + } else { + let (inner_sig, err) = parse_shape_name(working_set, inner_bytes, inner_span); + + (SyntaxShape::List(Box::new(inner_sig)), err) + } + } else { + ( + SyntaxShape::List(Box::new(SyntaxShape::Any)), + Some(ParseError::UnknownType(span)), + ) + } } pub fn parse_type(_working_set: &StateWorkingSet, bytes: &[u8]) -> Type { @@ -3518,13 +3568,14 @@ pub fn parse_signature_helper( let mut error = None; let source = working_set.get_span_contents(span); - let (output, err) = lex( + let (output, err) = lex_signature( source, span.start, &[b'\n', b'\r'], &[b':', b'=', b','], false, ); + error = error.or(err); let mut args: Vec = vec![]; diff --git a/crates/nu-parser/tests/test_lex.rs b/crates/nu-parser/tests/test_lex.rs index bbbb33e4af..ef843f9131 100644 --- a/crates/nu-parser/tests/test_lex.rs +++ b/crates/nu-parser/tests/test_lex.rs @@ -1,4 +1,4 @@ -use nu_parser::{lex, ParseError, Token, TokenContents}; +use nu_parser::{lex, lex_signature, ParseError, Token, TokenContents}; use nu_protocol::Span; #[test] @@ -22,6 +22,100 @@ fn lex_newline() { })); } +#[test] +fn lex_annotations_list() { + let file = b"items: list"; + + let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false); + + assert!(err.is_none()); + assert_eq!(output.len(), 3); +} + +#[test] +fn lex_annotations_record() { + let file = b"config: record"; + + let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false); + + assert!(err.is_none()); + assert_eq!(output.len(), 3); +} + +#[test] +fn lex_annotations_empty() { + let file = b"items: list<>"; + + let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false); + + assert!(err.is_none()); + assert_eq!(output.len(), 3); +} + +#[test] +fn lex_annotations_space_before_annotations() { + let file = b"items: list "; + + let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false); + + assert!(err.is_none()); + assert_eq!(output.len(), 4); +} + +#[test] +fn lex_annotations_space_within_annotations() { + let file = b"items: list< string>"; + + let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false); + + assert!(err.is_none()); + assert_eq!(output.len(), 3); + + let file = b"items: list"; + + let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false); + + assert!(err.is_none()); + assert_eq!(output.len(), 3); + + let file = b"items: list< string >"; + + let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false); + + assert!(err.is_none()); + assert_eq!(output.len(), 3); +} + +#[test] +fn lex_annotations_nested() { + let file = b"items: list>"; + + let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false); + + assert!(err.is_none()); + assert_eq!(output.len(), 3); +} + +#[test] +fn lex_annotations_nested_unterminated() { + let file = b"items: list"; + + let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false); + + assert!(matches!(err.unwrap(), ParseError::UnexpectedEof(_, _))); + assert_eq!(output.len(), 3); +} + +#[test] +fn lex_annotations_unterminated() { + let file = b"items: list TestResult { + let input = "def run [list: list] {$list | length}; run [2 5 4]"; + let expected = "3"; + run_test(input, expected) +} + +#[test] +fn list_annotations_unknown_prefix() -> TestResult { + let input = "def run [list: listint>] {$list | length}; run [2 5 4]"; + let expected = "unknown type"; + fail_test(input, expected) +} + +#[test] +fn list_annotations_empty_1() -> TestResult { + let input = "def run [list: list] {$list | length}; run [2 5 4]"; + let expected = "3"; + run_test(input, expected) +} + +#[test] +fn list_annotations_empty_2() -> TestResult { + let input = "def run [list: list<>] {$list | length}; run [2 5 4]"; + let expected = "3"; + run_test(input, expected) +} + +#[test] +fn list_annotations_empty_3() -> TestResult { + let input = "def run [list: list< >] {$list | length}; run [2 5 4]"; + let expected = "3"; + run_test(input, expected) +} + +#[test] +fn list_annotations_empty_4() -> TestResult { + let input = "def run [list: list<\n>] {$list | length}; run [2 5 4]"; + let expected = "3"; + run_test(input, expected) +} + +#[test] +fn list_annotations_nested() -> TestResult { + let input = "def run [list: list>] {$list | length}; run [ [2.0] [5.0] [4.0]]"; + let expected = "3"; + run_test(input, expected) +} + +#[test] +fn list_annotations_unknown_inner_type() -> TestResult { + let input = "def run [list: list] {$list | length}; run ['nushell' 'nunu' 'nana']"; + let expected = "unknown type"; + fail_test(input, expected) +} + +#[test] +fn list_annotations_nested_unknown_inner() -> TestResult { + let input = "def run [list: list>] {$list | length}; run [ [nushell] [nunu] [nana]]"; + let expected = "unknown type"; + fail_test(input, expected) +} + +#[test] +fn list_annotations_unterminated() -> TestResult { + let input = "def run [list: list TestResult { + let input = "def run [list: list] {$list | length}; run [2 5 4]"; + let expected = "expected closing >"; + fail_test(input, expected) +} + +#[test] +fn list_annotations_space_within_1() -> TestResult { + let input = "def run [list: list< range>] {$list | length}; run [2..32 5..<64 4..128]"; + let expected = "3"; + run_test(input, expected) +} + +#[test] +fn list_annotations_space_within_2() -> TestResult { + let input = "def run [list: list] {$list | length}; run [2 5 4]"; + let expected = "3"; + run_test(input, expected) +} + +#[test] +fn list_annotations_space_within_3() -> TestResult { + let input = "def run [list: list< int >] {$list | length}; run [2 5 4]"; + let expected = "3"; + run_test(input, expected) +} + +#[test] +fn list_annotations_space_before() -> TestResult { + let input = "def run [list: list ] {$list | length}; run [2 5 4]"; + let expected = "expected valid variable name for this parameter"; + fail_test(input, expected) +} + +#[test] +fn list_annotations_unknown_separators() -> TestResult { + let input = "def run [list: list] {$list | length}; run [2 5 4]"; + let expected = "unknown type"; + fail_test(input, expected) +}