allow lists to have type annotations (#8529)

this pr refines #8270 and closes #8109

# description
examples:

the original syntax is okay
```nu
def okay [nums: list] {}         # the type of list will be list<any>
```

empty annotations are allowed in any variation
the last two may be caught by a future formatter, 
but do not affect `nu` code currently
```nu
def okay [nums: list<>] {}       # okay

def okay [nums: list<     >] {}  # weird but also okay

def okay [nums: list<
>] {}                            # also weird but okay
```

types are allowed (See [notes](#notes) below)
```nu
def okay [nums: list<int>] {}    # `test [a b c]` will throw an error 

def okay [nums: list< int > {}   # any amount of space within the angle brackets is okay

def err [nums: list <int>] {}    # this is not okay, `nums` and `<int>` will be parsed as
                                 # two separate params, 
```

nested annotations are allowed in many variations
```nu
def okay [items: list<list<int>>] {}

def okay [items: list<list>] {}
```

any unterminated annotation is caught
```nu
Error: nu::parser::unexpected_eof

  × Unexpected end of code.
   ╭─[source:1:1]
 1 │ def err [nums: list<int] {}
   ·                       ▲
   ·                       ╰── expected closing >
   ╰────
```

unknown types are flagged
```nu
Error: nu::parser::unknown_type

  × Unknown type.
   ╭─[source:1:1]
 1 │ def err [nums: list<str>] {}
   ·                     ─┬─
   ·                      ╰── unknown type
   ╰────

Error: nu::parser::unknown_type

  × Unknown type.
   ╭─[source:1:1]
 1 │ def err [nums: list<int, string>] {}
   ·                    ─────┬─────
   ·                          ╰── unknown type
   ╰────
```

# notes
the error message for mismatched types in not as intuitive
```nu
Error: nu::parser::parse_mismatch

  × Parse mismatch during operation.
   ╭─[source:1:1]
 1 │ def err [nums: list<int>] {}; err [a b c]
   ·                                    ┬
   ·                                    ╰── expected int
   ╰────
```
it should be something like this
```nu
Error: nu::parser::parse_mismatch

  × Parse mismatch during operation.
   ╭─[source:1:1]
 1 │ def err [nums: list<int>] {}; err [a b c]
   ·                                    ──┬──
   ·                                      ╰── expected list<int>
   ╰────
```
this is currently not implemented
This commit is contained in:
mike 2023-03-24 14:54:06 +03:00 committed by GitHub
parent d0aa69bfcb
commit 8cf9bc9993
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 311 additions and 18 deletions

View file

@ -150,18 +150,6 @@ fn def_fails_with_invalid_name() {
assert!(actual.err.contains(err_msg));
}
#[test]
fn def_errors_with_specified_list_type() {
let actual = nu!(
cwd: ".", pipeline(
r#"
def test-command [ foo: list<any> ] {}
"#
));
assert!(actual.err.contains("unknown type"));
}
#[test]
fn def_with_list() {
Playground::setup("def_with_list", |dirs, _| {

View file

@ -31,6 +31,7 @@ pub enum BlockKind {
Paren,
CurlyBracket,
SquareBracket,
AngleBracket,
}
impl BlockKind {
@ -39,6 +40,7 @@ impl BlockKind {
BlockKind::Paren => b')',
BlockKind::SquareBracket => b']',
BlockKind::CurlyBracket => b'}',
BlockKind::AngleBracket => b'>',
}
}
}
@ -77,6 +79,7 @@ pub fn lex_item(
span_offset: usize,
additional_whitespace: &[u8],
special_tokens: &[u8],
in_signature: bool,
) -> (Token, Option<ParseError>) {
// This variable tracks the starting character of a string literal, so that
// we remain inside the string literal lexer mode until we encounter the
@ -156,6 +159,12 @@ pub fn lex_item(
} else if c == b'[' {
// We encountered an opening `[` delimiter.
block_level.push(BlockKind::SquareBracket);
} else if c == b'<' && in_signature {
block_level.push(BlockKind::AngleBracket);
} else if c == b'>' && in_signature {
if let Some(BlockKind::AngleBracket) = block_level.last() {
let _ = block_level.pop();
}
} else if c == b']' {
// We encountered a closing `]` delimiter. Pop off the opening `[`
// delimiter.
@ -299,12 +308,48 @@ pub fn lex_item(
}
}
pub fn lex_signature(
input: &[u8],
span_offset: usize,
additional_whitespace: &[u8],
special_tokens: &[u8],
skip_comment: bool,
) -> (Vec<Token>, Option<ParseError>) {
lex_internal(
input,
span_offset,
additional_whitespace,
special_tokens,
skip_comment,
true,
)
}
pub fn lex(
input: &[u8],
span_offset: usize,
additional_whitespace: &[u8],
special_tokens: &[u8],
skip_comment: bool,
) -> (Vec<Token>, Option<ParseError>) {
lex_internal(
input,
span_offset,
additional_whitespace,
special_tokens,
skip_comment,
false,
)
}
fn lex_internal(
input: &[u8],
span_offset: usize,
additional_whitespace: &[u8],
special_tokens: &[u8],
skip_comment: bool,
// within signatures we want to treat `<` and `>` specially
in_signature: bool,
) -> (Vec<Token>, Option<ParseError>) {
let mut error = None;
@ -427,6 +472,7 @@ pub fn lex(
span_offset,
additional_whitespace,
special_tokens,
in_signature,
);
if error.is_none() {
error = err;

View file

@ -16,7 +16,7 @@ pub use flatten::{
flatten_block, flatten_expression, flatten_pipeline, flatten_pipeline_element, FlatShape,
};
pub use known_external::KnownExternal;
pub use lex::{lex, Token, TokenContents};
pub use lex::{lex, lex_signature, Token, TokenContents};
pub use lite_parser::{lite_parse, LiteBlock, LiteElement};
pub use parse_keywords::*;

View file

@ -1,6 +1,6 @@
use crate::{
eval::{eval_constant, value_as_string},
lex,
lex::{lex, lex_signature},
lite_parser::{lite_parse, LiteCommand, LiteElement},
parse_mut,
parse_patterns::{parse_match_pattern, parse_pattern},
@ -3039,6 +3039,8 @@ pub fn parse_shape_name(
bytes: &[u8],
span: Span,
) -> (SyntaxShape, Option<ParseError>) {
let mut error = None;
let result = match bytes {
b"any" => SyntaxShape::Any,
b"binary" => SyntaxShape::Binary,
@ -3060,7 +3062,11 @@ pub fn parse_shape_name(
b"int" => SyntaxShape::Int,
b"import-pattern" => SyntaxShape::ImportPattern,
b"keyword" => SyntaxShape::Keyword(vec![], Box::new(SyntaxShape::Any)),
b"list" => SyntaxShape::List(Box::new(SyntaxShape::Any)),
_ if bytes.starts_with(b"list") => {
let (sig, err) = parse_list_shape(working_set, bytes, span);
error = error.or(err);
sig
}
b"math" => SyntaxShape::MathExpression,
b"nothing" => SyntaxShape::Nothing,
b"number" => SyntaxShape::Number,
@ -3104,7 +3110,51 @@ pub fn parse_shape_name(
}
};
(result, None)
(result, error)
}
fn parse_list_shape(
working_set: &StateWorkingSet,
bytes: &[u8],
span: Span,
) -> (SyntaxShape, Option<ParseError>) {
assert!(bytes.starts_with(b"list"));
if bytes == b"list" {
(SyntaxShape::List(Box::new(SyntaxShape::Any)), None)
} else if bytes.starts_with(b"list<") {
let start = span.start + 5;
// if the annotation is unterminated, we want to return early to avoid
// overflows with spans
let end = if bytes.ends_with(b">") {
span.end - 1
} else {
let err = ParseError::Unclosed(">".into(), span);
return (SyntaxShape::List(Box::new(SyntaxShape::Any)), Some(err));
};
let inner_span = Span::new(start, end);
let inner_text = String::from_utf8_lossy(working_set.get_span_contents(inner_span));
// remove any extra whitespace, for example `list< string >` becomes `list<string>`
let inner_bytes = inner_text.trim().as_bytes();
// list<>
if inner_bytes.is_empty() {
(SyntaxShape::List(Box::new(SyntaxShape::Any)), None)
} else {
let (inner_sig, err) = parse_shape_name(working_set, inner_bytes, inner_span);
(SyntaxShape::List(Box::new(inner_sig)), err)
}
} else {
(
SyntaxShape::List(Box::new(SyntaxShape::Any)),
Some(ParseError::UnknownType(span)),
)
}
}
pub fn parse_type(_working_set: &StateWorkingSet, bytes: &[u8]) -> Type {
@ -3518,13 +3568,14 @@ pub fn parse_signature_helper(
let mut error = None;
let source = working_set.get_span_contents(span);
let (output, err) = lex(
let (output, err) = lex_signature(
source,
span.start,
&[b'\n', b'\r'],
&[b':', b'=', b','],
false,
);
error = error.or(err);
let mut args: Vec<Arg> = vec![];

View file

@ -1,4 +1,4 @@
use nu_parser::{lex, ParseError, Token, TokenContents};
use nu_parser::{lex, lex_signature, ParseError, Token, TokenContents};
use nu_protocol::Span;
#[test]
@ -22,6 +22,100 @@ fn lex_newline() {
}));
}
#[test]
fn lex_annotations_list() {
let file = b"items: list<string>";
let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
assert!(err.is_none());
assert_eq!(output.len(), 3);
}
#[test]
fn lex_annotations_record() {
let file = b"config: record<name: string>";
let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
assert!(err.is_none());
assert_eq!(output.len(), 3);
}
#[test]
fn lex_annotations_empty() {
let file = b"items: list<>";
let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
assert!(err.is_none());
assert_eq!(output.len(), 3);
}
#[test]
fn lex_annotations_space_before_annotations() {
let file = b"items: list <string>";
let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
assert!(err.is_none());
assert_eq!(output.len(), 4);
}
#[test]
fn lex_annotations_space_within_annotations() {
let file = b"items: list< string>";
let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
assert!(err.is_none());
assert_eq!(output.len(), 3);
let file = b"items: list<string >";
let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
assert!(err.is_none());
assert_eq!(output.len(), 3);
let file = b"items: list< string >";
let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
assert!(err.is_none());
assert_eq!(output.len(), 3);
}
#[test]
fn lex_annotations_nested() {
let file = b"items: list<record<name: string>>";
let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
assert!(err.is_none());
assert_eq!(output.len(), 3);
}
#[test]
fn lex_annotations_nested_unterminated() {
let file = b"items: list<record<name: string>";
let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
assert!(matches!(err.unwrap(), ParseError::UnexpectedEof(_, _)));
assert_eq!(output.len(), 3);
}
#[test]
fn lex_annotations_unterminated() {
let file = b"items: list<string";
let (output, err) = lex_signature(file, 0, &[b'\n', b'\r'], &[b':', b'=', b','], false);
assert!(matches!(err.unwrap(), ParseError::UnexpectedEof(_, _)));
assert_eq!(output.len(), 3);
}
#[test]
fn lex_empty() {
let file = b"";

View file

@ -15,6 +15,7 @@ mod test_modules;
mod test_parser;
mod test_ranges;
mod test_regex;
mod test_signatures;
mod test_strings;
mod test_table_operations;
mod test_type_check;

View file

@ -0,0 +1,113 @@
use crate::tests::{fail_test, run_test, TestResult};
#[test]
fn list_annotations() -> TestResult {
let input = "def run [list: list<int>] {$list | length}; run [2 5 4]";
let expected = "3";
run_test(input, expected)
}
#[test]
fn list_annotations_unknown_prefix() -> TestResult {
let input = "def run [list: listint>] {$list | length}; run [2 5 4]";
let expected = "unknown type";
fail_test(input, expected)
}
#[test]
fn list_annotations_empty_1() -> TestResult {
let input = "def run [list: list] {$list | length}; run [2 5 4]";
let expected = "3";
run_test(input, expected)
}
#[test]
fn list_annotations_empty_2() -> TestResult {
let input = "def run [list: list<>] {$list | length}; run [2 5 4]";
let expected = "3";
run_test(input, expected)
}
#[test]
fn list_annotations_empty_3() -> TestResult {
let input = "def run [list: list< >] {$list | length}; run [2 5 4]";
let expected = "3";
run_test(input, expected)
}
#[test]
fn list_annotations_empty_4() -> TestResult {
let input = "def run [list: list<\n>] {$list | length}; run [2 5 4]";
let expected = "3";
run_test(input, expected)
}
#[test]
fn list_annotations_nested() -> TestResult {
let input = "def run [list: list<list<float>>] {$list | length}; run [ [2.0] [5.0] [4.0]]";
let expected = "3";
run_test(input, expected)
}
#[test]
fn list_annotations_unknown_inner_type() -> TestResult {
let input = "def run [list: list<str>] {$list | length}; run ['nushell' 'nunu' 'nana']";
let expected = "unknown type";
fail_test(input, expected)
}
#[test]
fn list_annotations_nested_unknown_inner() -> TestResult {
let input = "def run [list: list<list<str>>] {$list | length}; run [ [nushell] [nunu] [nana]]";
let expected = "unknown type";
fail_test(input, expected)
}
#[test]
fn list_annotations_unterminated() -> TestResult {
let input = "def run [list: list<string] {$list | length}; run [nu she ll]";
let expected = "expected closing >";
fail_test(input, expected)
}
#[test]
fn list_annotations_nested_unterminated() -> TestResult {
let input = "def run [list: list<list<>] {$list | length}; run [2 5 4]";
let expected = "expected closing >";
fail_test(input, expected)
}
#[test]
fn list_annotations_space_within_1() -> TestResult {
let input = "def run [list: list< range>] {$list | length}; run [2..32 5..<64 4..128]";
let expected = "3";
run_test(input, expected)
}
#[test]
fn list_annotations_space_within_2() -> TestResult {
let input = "def run [list: list<number >] {$list | length}; run [2 5 4]";
let expected = "3";
run_test(input, expected)
}
#[test]
fn list_annotations_space_within_3() -> TestResult {
let input = "def run [list: list< int >] {$list | length}; run [2 5 4]";
let expected = "3";
run_test(input, expected)
}
#[test]
fn list_annotations_space_before() -> TestResult {
let input = "def run [list: list <int>] {$list | length}; run [2 5 4]";
let expected = "expected valid variable name for this parameter";
fail_test(input, expected)
}
#[test]
fn list_annotations_unknown_separators() -> TestResult {
let input = "def run [list: list<int, string>] {$list | length}; run [2 5 4]";
let expected = "unknown type";
fail_test(input, expected)
}