allow lists to have type annotations (#8529)

this pr refines #8270 and closes #8109 # description examples: the original syntax is okay ```nu def okay [nums: list] {} # the type of list will be list<any> ``` empty annotations are allowed in any variation the last two may be caught by a future formatter, but do not affect `nu` code currently ```nu def okay [nums: list<>] {} # okay def okay [nums: list< >] {} # weird but also okay def okay [nums: list< >] {} # also weird but okay ``` types are allowed (See [notes](#notes) below) ```nu def okay [nums: list<int>] {} # `test [a b c]` will throw an error def okay [nums: list< int > {} # any amount of space within the angle brackets is okay def err [nums: list <int>] {} # this is not okay, `nums` and `<int>` will be parsed as # two separate params, ``` nested annotations are allowed in many variations ```nu def okay [items: list<list<int>>] {} def okay [items: list<list>] {} ``` any unterminated annotation is caught ```nu Error: nu::parser::unexpected_eof × Unexpected end of code. ╭─[source:1:1] 1 │ def err [nums: list<int] {} · ▲ · ╰── expected closing > ╰──── ``` unknown types are flagged ```nu Error: nu::parser::unknown_type × Unknown type. ╭─[source:1:1] 1 │ def err [nums: list<str>] {} · ─┬─ · ╰── unknown type ╰──── Error: nu::parser::unknown_type × Unknown type. ╭─[source:1:1] 1 │ def err [nums: list<int, string>] {} · ─────┬───── · ╰── unknown type ╰──── ``` # notes the error message for mismatched types in not as intuitive ```nu Error: nu::parser::parse_mismatch × Parse mismatch during operation. ╭─[source:1:1] 1 │ def err [nums: list<int>] {}; err [a b c] · ┬ · ╰── expected int ╰──── ``` it should be something like this ```nu Error: nu::parser::parse_mismatch × Parse mismatch during operation. ╭─[source:1:1] 1 │ def err [nums: list<int>] {}; err [a b c] · ──┬── · ╰── expected list<int> ╰──── ``` this is currently not implemented
2024-12-26 13:03:07 +00:00 · 2023-03-24 14:54:06 +03:00 · 2023-03-24 14:54:06 +03:00 · 8cf9bc9993
commit 8cf9bc9993
parent d0aa69bfcb
7 changed files with 311 additions and 18 deletions
--- a/crates/nu-command/tests/commands/def.rs
+++ b/crates/nu-command/tests/commands/def.rs
@ -150,18 +150,6 @@ fn def_fails_with_invalid_name() {
    assert!(actual.err.contains(err_msg));
 }

-#[test]
-fn def_errors_with_specified_list_type() {
-    let actual = nu!(
-        cwd: ".", pipeline(
-        r#"
-        def test-command [ foo: list<any> ] {}
-        "#
-    ));
-
-    assert!(actual.err.contains("unknown type"));
-}
-
 #[test]
 fn def_with_list() {
    Playground::setup("def_with_list", |dirs, _| {
--- a/crates/nu-parser/src/lex.rs
+++ b/crates/nu-parser/src/lex.rs
@ -31,6 +31,7 @@ pub enum BlockKind {
    Paren,
    CurlyBracket,
    SquareBracket,
+    AngleBracket,
 }

 impl BlockKind {
@ -39,6 +40,7 @@ impl BlockKind {
            BlockKind::Paren => b')',
            BlockKind::SquareBracket => b']',
            BlockKind::CurlyBracket => b'}',
+            BlockKind::AngleBracket => b'>',
        }
    }
 }
@ -77,6 +79,7 @@ pub fn lex_item(
    span_offset: usize,
    additional_whitespace: &[u8],
    special_tokens: &[u8],
+    in_signature: bool,
 ) -> (Token, Option<ParseError>) {
    // This variable tracks the starting character of a string literal, so that
    // we remain inside the string literal lexer mode until we encounter the
@ -156,6 +159,12 @@ pub fn lex_item(
        } else if c == b'[' {
            // We encountered an opening `[` delimiter.
            block_level.push(BlockKind::SquareBracket);
+        } else if c == b'<' && in_signature {
+            block_level.push(BlockKind::AngleBracket);
+        } else if c == b'>' && in_signature {
+            if let Some(BlockKind::AngleBracket) = block_level.last() {
+                let _ = block_level.pop();
+            }
        } else if c == b']' {
            // We encountered a closing `]` delimiter. Pop off the opening `[`
            // delimiter.
@ -299,12 +308,48 @@ pub fn lex_item(
    }
 }

+pub fn lex_signature(
+    input: &[u8],
+    span_offset: usize,
+    additional_whitespace: &[u8],
+    special_tokens: &[u8],
+    skip_comment: bool,
+) -> (Vec<Token>, Option<ParseError>) {
+    lex_internal(
+        input,
+        span_offset,
+        additional_whitespace,
+        special_tokens,
+        skip_comment,
+        true,
+    )
+}
+
 pub fn lex(
    input: &[u8],
    span_offset: usize,
    additional_whitespace: &[u8],
    special_tokens: &[u8],
    skip_comment: bool,
+) -> (Vec<Token>, Option<ParseError>) {
+    lex_internal(
+        input,
+        span_offset,
+        additional_whitespace,
+        special_tokens,
+        skip_comment,
+        false,
+    )
+}
+
+fn lex_internal(
+    input: &[u8],
+    span_offset: usize,
+    additional_whitespace: &[u8],
+    special_tokens: &[u8],
+    skip_comment: bool,
+    // within signatures we want to treat `<` and `>` specially
+    in_signature: bool,
 ) -> (Vec<Token>, Option<ParseError>) {
    let mut error = None;

@ -427,6 +472,7 @@ pub fn lex(
                span_offset,
                additional_whitespace,
                special_tokens,
+                in_signature,
            );
            if error.is_none() {
                error = err;
--- a/crates/nu-parser/src/lib.rs
+++ b/crates/nu-parser/src/lib.rs
@ -16,7 +16,7 @@ pub use flatten::{
    flatten_block, flatten_expression, flatten_pipeline, flatten_pipeline_element, FlatShape,
 };
 pub use known_external::KnownExternal;
-pub use lex::{lex, Token, TokenContents};
+pub use lex::{lex, lex_signature, Token, TokenContents};
 pub use lite_parser::{lite_parse, LiteBlock, LiteElement};
 pub use parse_keywords::*;

--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@ -1,6 +1,6 @@
 use crate::{
    eval::{eval_constant, value_as_string},
-    lex,
+    lex::{lex, lex_signature},
    lite_parser::{lite_parse, LiteCommand, LiteElement},
    parse_mut,
    parse_patterns::{parse_match_pattern, parse_pattern},
@ -3039,6 +3039,8 @@ pub fn parse_shape_name(
    bytes: &[u8],
    span: Span,
 ) -> (SyntaxShape, Option<ParseError>) {
+    let mut error = None;
+
    let result = match bytes {
        b"any" => SyntaxShape::Any,
        b"binary" => SyntaxShape::Binary,
@ -3060,7 +3062,11 @@ pub fn parse_shape_name(
        b"int" => SyntaxShape::Int,
        b"import-pattern" => SyntaxShape::ImportPattern,
        b"keyword" => SyntaxShape::Keyword(vec![], Box::new(SyntaxShape::Any)),
-        b"list" => SyntaxShape::List(Box::new(SyntaxShape::Any)),
+        _ if bytes.starts_with(b"list") => {
+            let (sig, err) = parse_list_shape(working_set, bytes, span);
+            error = error.or(err);
+            sig
+        }
        b"math" => SyntaxShape::MathExpression,
        b"nothing" => SyntaxShape::Nothing,
        b"number" => SyntaxShape::Number,
@ -3104,7 +3110,51 @@ pub fn parse_shape_name(
        }
    };

-    (result, None)
+    (result, error)
+}
+
+fn parse_list_shape(
+    working_set: &StateWorkingSet,
+    bytes: &[u8],
+    span: Span,
+) -> (SyntaxShape, Option<ParseError>) {
+    assert!(bytes.starts_with(b"list"));
+
+    if bytes == b"list" {
+        (SyntaxShape::List(Box::new(SyntaxShape::Any)), None)
+    } else if bytes.starts_with(b"list<") {
+        let start = span.start + 5;
+
+        // if the annotation is unterminated, we want to return early to avoid
+        // overflows with spans
+        let end = if bytes.ends_with(b">") {
+            span.end - 1
+        } else {
+            let err = ParseError::Unclosed(">".into(), span);
+            return (SyntaxShape::List(Box::new(SyntaxShape::Any)), Some(err));
+        };
+
+        let inner_span = Span::new(start, end);
+
+        let inner_text = String::from_utf8_lossy(working_set.get_span_contents(inner_span));
+
+        // remove any extra whitespace, for example `list< string >` becomes `list<string>`
+        let inner_bytes = inner_text.trim().as_bytes();
+
+        // list<>
+        if inner_bytes.is_empty() {
+            (SyntaxShape::List(Box::new(SyntaxShape::Any)), None)
+        } else {
+            let (inner_sig, err) = parse_shape_name(working_set, inner_bytes, inner_span);
+
+            (SyntaxShape::List(Box::new(inner_sig)), err)
+        }
+    } else {
+        (
+            SyntaxShape::List(Box::new(SyntaxShape::Any)),
+            Some(ParseError::UnknownType(span)),
+        )
+    }
 }

 pub fn parse_type(_working_set: &StateWorkingSet, bytes: &[u8]) -> Type {
@ -3518,13 +3568,14 @@ pub fn parse_signature_helper(
    let mut error = None;
    let source = working_set.get_span_contents(span);

-    let (output, err) = lex(
+    let (output, err) = lex_signature(
        source,
        span.start,
        &[b'\n', b'\r'],
        &[b':', b'=', b','],
        false,
    );
+
    error = error.or(err);

    let mut args: Vec<Arg> = vec![];
--- a/crates/nu-parser/tests/test_lex.rs
+++ b/crates/nu-parser/tests/test_lex.rs
@ -1,4 +1,4 @@
-use nu_parser::{lex, ParseError, Token, TokenContents};
+use nu_parser::{lex, lex_signature, ParseError, Token, TokenContents};
 use nu_protocol::Span;

 #[test]
@ -22,6 +22,100 @@ fn lex_newline() {
    }));
 }

+#[test]
+fn lex_annotations_list() {
+    let file = b"items: list<string>";
+
+    let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
+
+    assert!(err.is_none());
+    assert_eq!(output.len(), 3);
+}
+
+#[test]
+fn lex_annotations_record() {
+    let file = b"config: record<name: string>";
+
+    let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
+
+    assert!(err.is_none());
+    assert_eq!(output.len(), 3);
+}
+
+#[test]
+fn lex_annotations_empty() {
+    let file = b"items: list<>";
+
+    let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
+
+    assert!(err.is_none());
+    assert_eq!(output.len(), 3);
+}
+
+#[test]
+fn lex_annotations_space_before_annotations() {
+    let file = b"items: list <string>";
+
+    let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
+
+    assert!(err.is_none());
+    assert_eq!(output.len(), 4);
+}
+
+#[test]
+fn lex_annotations_space_within_annotations() {
+    let file = b"items: list< string>";
+
+    let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
+
+    assert!(err.is_none());
+    assert_eq!(output.len(), 3);
+
+    let file = b"items: list<string >";
+
+    let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
+
+    assert!(err.is_none());
+    assert_eq!(output.len(), 3);
+
+    let file = b"items: list< string >";
+
+    let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
+
+    assert!(err.is_none());
+    assert_eq!(output.len(), 3);
+}
+
+#[test]
+fn lex_annotations_nested() {
+    let file = b"items: list<record<name: string>>";
+
+    let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
+
+    assert!(err.is_none());
+    assert_eq!(output.len(), 3);
+}
+
+#[test]
+fn lex_annotations_nested_unterminated() {
+    let file = b"items: list<record<name: string>";
+
+    let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
+
+    assert!(matches!(err.unwrap(), ParseError::UnexpectedEof(_, _)));
+    assert_eq!(output.len(), 3);
+}
+
+#[test]
+fn lex_annotations_unterminated() {
+    let file = b"items: list<string";
+
+    let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
+
+    assert!(matches!(err.unwrap(), ParseError::UnexpectedEof(_, _)));
+    assert_eq!(output.len(), 3);
+}
+
 #[test]
 fn lex_empty() {
    let file = b"";
--- a/src/tests.rs
+++ b/src/tests.rs
@ -15,6 +15,7 @@ mod test_modules;
 mod test_parser;
 mod test_ranges;
 mod test_regex;
+mod test_signatures;
 mod test_strings;
 mod test_table_operations;
 mod test_type_check;
--- a/src/tests/test_signatures.rs
+++ b/src/tests/test_signatures.rs
@ -0,0 +1,113 @@
+use crate::tests::{fail_test, run_test, TestResult};
+
+#[test]
+fn list_annotations() -> TestResult {
+    let input = "def run [list: list<int>] {$list | length}; run [2 5 4]";
+    let expected = "3";
+    run_test(input, expected)
+}
+
+#[test]
+fn list_annotations_unknown_prefix() -> TestResult {
+    let input = "def run [list: listint>] {$list | length}; run [2 5 4]";
+    let expected = "unknown type";
+    fail_test(input, expected)
+}
+
+#[test]
+fn list_annotations_empty_1() -> TestResult {
+    let input = "def run [list: list] {$list | length}; run [2 5 4]";
+    let expected = "3";
+    run_test(input, expected)
+}
+
+#[test]
+fn list_annotations_empty_2() -> TestResult {
+    let input = "def run [list: list<>] {$list | length}; run [2 5 4]";
+    let expected = "3";
+    run_test(input, expected)
+}
+
+#[test]
+fn list_annotations_empty_3() -> TestResult {
+    let input = "def run [list: list< >] {$list | length}; run [2 5 4]";
+    let expected = "3";
+    run_test(input, expected)
+}
+
+#[test]
+fn list_annotations_empty_4() -> TestResult {
+    let input = "def run [list: list<\n>] {$list | length}; run [2 5 4]";
+    let expected = "3";
+    run_test(input, expected)
+}
+
+#[test]
+fn list_annotations_nested() -> TestResult {
+    let input = "def run [list: list<list<float>>] {$list | length}; run [ [2.0] [5.0] [4.0]]";
+    let expected = "3";
+    run_test(input, expected)
+}
+
+#[test]
+fn list_annotations_unknown_inner_type() -> TestResult {
+    let input = "def run [list: list<str>] {$list | length}; run ['nushell' 'nunu' 'nana']";
+    let expected = "unknown type";
+    fail_test(input, expected)
+}
+
+#[test]
+fn list_annotations_nested_unknown_inner() -> TestResult {
+    let input = "def run [list: list<list<str>>] {$list | length}; run [ [nushell] [nunu] [nana]]";
+    let expected = "unknown type";
+    fail_test(input, expected)
+}
+
+#[test]
+fn list_annotations_unterminated() -> TestResult {
+    let input = "def run [list: list<string] {$list | length}; run [nu she ll]";
+    let expected = "expected closing >";
+    fail_test(input, expected)
+}
+
+#[test]
+fn list_annotations_nested_unterminated() -> TestResult {
+    let input = "def run [list: list<list<>] {$list | length}; run [2 5 4]";
+    let expected = "expected closing >";
+    fail_test(input, expected)
+}
+
+#[test]
+fn list_annotations_space_within_1() -> TestResult {
+    let input = "def run [list: list< range>] {$list | length}; run [2..32 5..<64 4..128]";
+    let expected = "3";
+    run_test(input, expected)
+}
+
+#[test]
+fn list_annotations_space_within_2() -> TestResult {
+    let input = "def run [list: list<number >] {$list | length}; run [2 5 4]";
+    let expected = "3";
+    run_test(input, expected)
+}
+
+#[test]
+fn list_annotations_space_within_3() -> TestResult {
+    let input = "def run [list: list< int >] {$list | length}; run [2 5 4]";
+    let expected = "3";
+    run_test(input, expected)
+}
+
+#[test]
+fn list_annotations_space_before() -> TestResult {
+    let input = "def run [list: list <int>] {$list | length}; run [2 5 4]";
+    let expected = "expected valid variable name for this parameter";
+    fail_test(input, expected)
+}
+
+#[test]
+fn list_annotations_unknown_separators() -> TestResult {
+    let input = "def run [list: list<int, string>] {$list | length}; run [2 5 4]";
+    let expected = "unknown type";
+    fail_test(input, expected)
+}