allow records to have type annotations (#8914)

# Description follow up to #8529 cleaned up version of #8892 - the original syntax is okay ```nu def okay [rec: record] {} ``` - you can now add type annotations for fields if you know them before hand ```nu def okay [rec: record<name: string>] {} ``` - you can specify multiple fields ```nu def okay [person: record<name: string age: int>] {} # an optional comma is allowed def okay [person: record<name: string, age: int>] {} ``` - if annotations are specified, any use of the command will be type checked against the specified type ```nu def unwrap [result: record<ok: bool, value: any>] {} unwrap {ok: 2, value: "value"} # errors with Error: nu::parser::type_mismatch × Type mismatch. ╭─[entry #4:1:1] 1 │ unwrap {ok: 2, value: "value"} · ───────┬───── · ╰── expected record<ok: bool, value: any>, found record<ok: int, value: string> ╰──── ``` > here the error is in the `ok` field, since `any` is coerced into any type > as a result `unwrap {ok: true, value: "value"}` is okay - the key must be a string, either quoted or unquoted ```nu def err [rec: record<{}: list>] {} # errors with Error: × `record` type annotations key not string ╭─[entry #7:1:1] 1 │ def unwrap [result: record<{}: bool, value: any>] {} · ─┬ · ╰── must be a string ╰──── ``` - a key doesn't have to have a type in which case it is assumed to be `any` ```nu def okay [person: record<name age>] {} def okay [person: record<name: string age>] {} ``` - however, if you put a colon, you have to specify a type ```nu def err [person: record<name: >] {} # errors with Error: nu::parser::parse_mismatch × Parse mismatch during operation. ╭─[entry #12:1:1] 1 │ def unwrap [res: record<name: >] { $res } · ┬ · ╰── expected type after colon ╰──── ``` # User-Facing Changes **[BREAKING CHANGES]** - this change adds a field to `SyntaxShape::Record` so any plugins that used it will have to update and include the field. though if you are unsure of the type the record expects, `SyntaxShape::Record(vec![])` will suffice
2024-12-26 04:53:09 +00:00 · 2023-04-26 16:16:55 +03:00 · 2023-04-26 16:16:55 +03:00 · 77ca73f414
commit 77ca73f414
parent 48c75831fc
8 changed files with 347 additions and 93 deletions
--- a/crates/nu-cmd-lang/src/core_commands/error_make.rs
+++ b/crates/nu-cmd-lang/src/core_commands/error_make.rs
@ -16,7 +16,11 @@ impl Command for ErrorMake {
    fn signature(&self) -> Signature {
        Signature::build("error make")
            .input_output_types(vec![(Type::Nothing, Type::Error)])
-            .required("error_struct", SyntaxShape::Record, "the error to create")
+            .required(
+                "error_struct",
+                SyntaxShape::Record(vec![]),
+                "the error to create",
+            )
            .switch(
                "unspanned",
                "remove the origin label from the error",
--- a/crates/nu-command/src/env/load_env.rs
+++ b/crates/nu-command/src/env/load_env.rs
@ -23,7 +23,7 @@ impl Command for LoadEnv {
            .allow_variants_without_examples(true)
            .optional(
                "update",
-                SyntaxShape::Record,
+                SyntaxShape::Record(vec![]),
                "the record to use for updates",
            )
            .category(Category::FileSystem)
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@ -2775,7 +2775,7 @@ pub fn parse_shape_name(
        b"operator" => SyntaxShape::Operator,
        b"path" => SyntaxShape::Filepath,
        b"range" => SyntaxShape::Range,
-        b"record" => SyntaxShape::Record,
+        _ if bytes.starts_with(b"record") => parse_collection_shape(working_set, bytes, span),
        b"signature" => SyntaxShape::Signature,
        b"string" => SyntaxShape::String,
        b"table" => SyntaxShape::Table,
@ -2814,38 +2814,129 @@ pub fn parse_shape_name(
    result
 }

+fn parse_collection_shape(
+    working_set: &mut StateWorkingSet,
+    bytes: &[u8],
+    span: Span,
+) -> SyntaxShape {
+    assert!(bytes.starts_with(b"record"));
+    let name = "record";
+    let mk_shape = SyntaxShape::Record;
+
+    if bytes == name.as_bytes() {
+        mk_shape(vec![])
+    } else if bytes.starts_with(b"record<") {
+        let Some(inner_span) = prepare_inner_span(working_set, bytes, span, 7) else {
+            return SyntaxShape::Any;
+        };
+
+        // record<> or table<>
+        if inner_span.end - inner_span.start == 0 {
+            return mk_shape(vec![]);
+        }
+        let source = working_set.get_span_contents(inner_span);
+        let (tokens, err) = lex_signature(
+            source,
+            inner_span.start,
+            &[b'\n', b'\r'],
+            &[b':', b','],
+            true,
+        );
+
+        if let Some(err) = err {
+            working_set.error(err);
+            // lexer errors cause issues with span overflows
+            return mk_shape(vec![]);
+        }
+
+        let mut sig = vec![];
+        let mut idx = 0;
+
+        let key_error = |span| {
+            ParseError::LabeledError(
+                format!("`{name}` type annotations key not string"),
+                "must be a string".into(),
+                span,
+            )
+        };
+
+        while idx < tokens.len() {
+            let TokenContents::Item = tokens[idx].contents else {
+                working_set.error(key_error(tokens[idx].span));
+                return mk_shape(vec![])
+            };
+
+            let key_bytes = working_set.get_span_contents(tokens[idx].span).to_vec();
+            if key_bytes.first().copied() == Some(b',') {
+                idx += 1;
+                continue;
+            }
+
+            let Some(key) = parse_value(working_set, tokens[idx].span, &SyntaxShape::String).as_string() else {
+                working_set.error(key_error(tokens[idx].span));
+                return mk_shape(vec![]);
+            };
+
+            // we want to allow such an annotation
+            // `record<name>` where the user leaves out the type
+            if idx + 1 == tokens.len() {
+                sig.push((key, SyntaxShape::Any));
+                break;
+            } else {
+                idx += 1;
+            }
+
+            let maybe_colon = working_set.get_span_contents(tokens[idx].span).to_vec();
+            match maybe_colon.as_slice() {
+                b":" => {
+                    if idx + 1 == tokens.len() {
+                        working_set.error(ParseError::Expected(
+                            "type after colon".into(),
+                            tokens[idx].span,
+                        ));
+                        break;
+                    } else {
+                        idx += 1;
+                    }
+                }
+                // a key provided without a type
+                b"," => {
+                    idx += 1;
+                    sig.push((key, SyntaxShape::Any));
+                    continue;
+                }
+                // a key provided without a type
+                _ => {
+                    sig.push((key, SyntaxShape::Any));
+                    continue;
+                }
+            }
+
+            let shape_bytes = working_set.get_span_contents(tokens[idx].span).to_vec();
+            let shape = parse_shape_name(working_set, &shape_bytes, tokens[idx].span);
+            sig.push((key, shape));
+            idx += 1;
+        }
+
+        mk_shape(sig)
+    } else {
+        working_set.error(ParseError::UnknownType(span));
+
+        SyntaxShape::Any
+    }
+}
+
 fn parse_list_shape(working_set: &mut StateWorkingSet, bytes: &[u8], span: Span) -> SyntaxShape {
    assert!(bytes.starts_with(b"list"));

    if bytes == b"list" {
        SyntaxShape::List(Box::new(SyntaxShape::Any))
    } else if bytes.starts_with(b"list<") {
-        let start = span.start + 5;
-
-        // if the annotation is unterminated, we want to return early to avoid
-        // overflows with spans
-        let end = if bytes.ends_with(b">") {
-            span.end - 1
-        // extra characters after the >
-        } else if bytes.contains(&b'>') {
-            let angle_start = bytes.split(|it| it == &b'>').collect::<Vec<_>>()[0].len() + 1;
-            let span = Span::new(span.start + angle_start, span.end);
-
-            working_set.error(ParseError::LabeledError(
-                "Extra characters in the parameter name".into(),
-                "extra characters".into(),
-                span,
-            ));
+        let Some(inner_span) = prepare_inner_span(working_set, bytes, span, 5) else {
            return SyntaxShape::Any;
-        } else {
-            working_set.error(ParseError::Unclosed(">".into(), span));
-            return SyntaxShape::List(Box::new(SyntaxShape::Any));
        };

-        let inner_span = Span::new(start, end);
-
        let inner_text = String::from_utf8_lossy(working_set.get_span_contents(inner_span));
-
        // remove any extra whitespace, for example `list< string >` becomes `list<string>`
        let inner_bytes = inner_text.trim().as_bytes().to_vec();

@ -2864,6 +2955,34 @@ fn parse_list_shape(working_set: &mut StateWorkingSet, bytes: &[u8], span: Span)
    }
 }

+fn prepare_inner_span(
+    working_set: &mut StateWorkingSet,
+    bytes: &[u8],
+    span: Span,
+    prefix_len: usize,
+) -> Option<Span> {
+    let start = span.start + prefix_len;
+
+    if bytes.ends_with(b">") {
+        let end = span.end - 1;
+        Some(Span::new(start, end))
+    } else if bytes.contains(&b'>') {
+        let angle_start = bytes.split(|it| it == &b'>').collect::<Vec<_>>()[0].len() + 1;
+        let span = Span::new(span.start + angle_start, span.end);
+
+        working_set.error(ParseError::LabeledError(
+            "Extra characters in the parameter name".into(),
+            "extra characters".into(),
+            span,
+        ));
+
+        None
+    } else {
+        working_set.error(ParseError::Unclosed(">".into(), span));
+        None
+    }
+}
+
 pub fn parse_type(_working_set: &StateWorkingSet, bytes: &[u8]) -> Type {
    match bytes {
        b"binary" => Type::Binary,
@ -3602,43 +3721,13 @@ pub fn parse_signature_helper(working_set: &mut StateWorkingSet, span: Span) ->
                                                    expression.ty.clone(),
                                                );
                                            }
-                                            Type::List(param_ty) => {
-                                                if let Type::List(expr_ty) = &expression.ty {
-                                                    if param_ty == expr_ty
-                                                        || **param_ty == Type::Any
-                                                    {
-                                                        working_set.set_variable_type(
-                                                            var_id,
-                                                            expression.ty.clone(),
-                                                        );
-                                                    } else {
-                                                        working_set.error(ParseError::AssignmentMismatch(
-                                                                        "Default value wrong type"
-                                                                            .into(),
-                                                                        format!(
-                                                                            "expected default value to be `{var_type}`",
-                                                                        ),
-                                                                        expression.span,
-                                                                    ),
-                                                                )
-                                                    }
-                                                } else {
-                                                    working_set.error(ParseError::AssignmentMismatch(
-                                                            "Default value wrong type".into(),
-                                                            format!(
-                                                                "expected default value to be `{var_type}`",
-                                                            ),
-                                                            expression.span,
-                                                        ))
-                                                }
-                                            }
-                                            t => {
-                                                if t != &expression.ty {
+                                            _ => {
+                                                if !type_compatible(var_type, &expression.ty) {
                                                    working_set.error(
                                                        ParseError::AssignmentMismatch(
                                                            "Default value wrong type".into(),
                                                            format!(
-                                                            "expected default value to be `{t}`"
+                                                            "expected default value to be `{var_type}`"
                                                        ),
                                                            expression.span,
                                                        ),
@ -3686,7 +3775,7 @@ pub fn parse_signature_helper(working_set: &mut StateWorkingSet, span: Span) ->
                                                                "Default value is the wrong type"
                                                                    .into(),
                                                                format!(
-                                                                    "default value should be {t}"
+                                                            "expected default value to be `{t}`"
                                                                ),
                                                                expression_span,
                                                            ),
@ -4485,7 +4574,7 @@ pub fn parse_value(

        // Be sure to return ParseError::Expected(..) if invoked for one of these shapes, but lex
        // stream doesn't start with '{'} -- parsing in SyntaxShape::Any arm depends on this error variant.
-        SyntaxShape::Block | SyntaxShape::Closure(..) | SyntaxShape::Record => {
+        SyntaxShape::Block | SyntaxShape::Closure(..) | SyntaxShape::Record(_) => {
            working_set.error(ParseError::Expected(
                "block, closure or record".into(),
                span,
@ -4505,7 +4594,7 @@ pub fn parse_value(
                    SyntaxShape::Duration,
                    SyntaxShape::Range,
                    SyntaxShape::DateTime, //FIXME requires 3 failed conversion attempts before failing
-                    SyntaxShape::Record,
+                    SyntaxShape::Record(vec![]),
                    SyntaxShape::Closure(None),
                    SyntaxShape::Block,
                    SyntaxShape::Int,
--- a/crates/nu-parser/src/type_check.rs
+++ b/crates/nu-parser/src/type_check.rs
@ -5,6 +5,21 @@ use nu_protocol::{
 };

 pub fn type_compatible(lhs: &Type, rhs: &Type) -> bool {
+    // Structural subtyping
+    let is_compatible = |expected: &[(String, Type)], found: &[(String, Type)]| {
+        // the expected type is `any`
+        if expected.is_empty() {
+            true
+        } else if expected.len() != found.len() {
+            false
+        } else {
+            expected
+                .iter()
+                .zip(found.iter())
+                .all(|(lhs, rhs)| lhs.0 == rhs.0 && type_compatible(&lhs.1, &rhs.1))
+        }
+    };
+
    match (lhs, rhs) {
        (Type::List(c), Type::List(d)) => type_compatible(c, d),
        (Type::Number, Type::Int) => true,
@ -13,20 +28,7 @@ pub fn type_compatible(lhs: &Type, rhs: &Type) -> bool {
        (Type::Any, _) => true,
        (_, Type::Any) => true,
        (Type::Record(fields_lhs), Type::Record(fields_rhs)) => {
-            // Structural subtyping
-            'outer: for field_lhs in fields_lhs {
-                for field_rhs in fields_rhs {
-                    if field_lhs.0 == field_rhs.0 {
-                        if type_compatible(&field_lhs.1, &field_rhs.1) {
-                            continue 'outer;
-                        } else {
-                            return false;
-                        }
-                    }
-                }
-                return false;
-            }
-            true
+            is_compatible(fields_lhs, fields_rhs)
        }
        (lhs, rhs) => lhs == rhs,
    }
--- a/crates/nu-protocol/src/syntax_shape.rs
+++ b/crates/nu-protocol/src/syntax_shape.rs
@ -95,7 +95,7 @@ pub enum SyntaxShape {
    Range,

    /// A record value, eg `{x: 1, y: 2}`
-    Record,
+    Record(Vec<(String, SyntaxShape)>),

    /// A math expression which expands shorthand forms on the lefthand side, eg `foo > 1`
    /// The shorthand allows us to more easily reach columns inside of the row being passed in
@ -151,7 +151,13 @@ impl SyntaxShape {
            SyntaxShape::OneOf(_) => Type::Any,
            SyntaxShape::Operator => Type::Any,
            SyntaxShape::Range => Type::Any,
-            SyntaxShape::Record => Type::Record(vec![]), // FIXME: What role should fields play in the Record type?
+            SyntaxShape::Record(entries) => {
+                let ty = entries
+                    .iter()
+                    .map(|(key, val)| (key.clone(), val.to_type()))
+                    .collect();
+                Type::Record(ty)
+            }
            SyntaxShape::RowCondition => Type::Bool,
            SyntaxShape::Boolean => Type::Bool,
            SyntaxShape::Signature => Type::Signature,
@ -194,7 +200,21 @@ impl Display for SyntaxShape {
            SyntaxShape::Binary => write!(f, "binary"),
            SyntaxShape::Table => write!(f, "table"),
            SyntaxShape::List(x) => write!(f, "list<{x}>"),
-            SyntaxShape::Record => write!(f, "record"),
+            SyntaxShape::Record(entries) => {
+                if entries.is_empty() {
+                    write!(f, "record")
+                } else {
+                    write!(
+                        f,
+                        "record<{}>",
+                        entries
+                            .iter()
+                            .map(|(x, y)| format!("{x}: {y}"))
+                            .collect::<Vec<String>>()
+                            .join(", "),
+                    )
+                }
+            }
            SyntaxShape::Filesize => write!(f, "filesize"),
            SyntaxShape::Duration => write!(f, "duration"),
            SyntaxShape::DateTime => write!(f, "datetime"),
--- a/crates/nu-protocol/src/ty.rs
+++ b/crates/nu-protocol/src/ty.rs
@ -35,25 +35,28 @@ pub enum Type {

 impl Type {
    pub fn is_subtype(&self, other: &Type) -> bool {
+        // Structural subtyping
+        let is_subtype_collection = |this: &[(String, Type)], that: &[(String, Type)]| {
+            if this.is_empty() || that.is_empty() {
+                true
+            } else if this.len() != that.len() {
+                false
+            } else {
+                this.iter()
+                    .zip(that.iter())
+                    .all(|(lhs, rhs)| lhs.0 == rhs.0 && lhs.1.is_subtype(&rhs.1))
+            }
+        };
+
        match (self, other) {
            (t, u) if t == u => true,
            (Type::Float, Type::Number) => true,
            (Type::Int, Type::Number) => true,
            (_, Type::Any) => true,
            (Type::List(t), Type::List(u)) if t.is_subtype(u) => true, // List is covariant
-
-            // TODO: Currently Record types specify their field types. If we are
-            // going to continue to do that, then it might make sense to define
-            // a "structural subtyping" whereby r1 is a subtype of r2 is the
-            // fields of r1 are a "subset" of the fields of r2 (names are a
-            // subset and agree on types). However, if we do that, then we need
-            // a way to specify the supertype of all Records. For now, we define
-            // any Record to be a subtype of any other Record. This allows
-            // Record(vec![]) to be used as an ad-hoc supertype of all Records
-            // in command signatures. This comment applies to Tables also, with
-            // "columns" in place of "fields".
-            (Type::Record(_), Type::Record(_)) => true,
-            (Type::Table(_), Type::Table(_)) => true,
+            (Type::Record(this), Type::Record(that)) | (Type::Table(this), Type::Table(that)) => {
+                is_subtype_collection(this, that)
+            }
            _ => false,
        }
    }
@ -87,7 +90,13 @@ impl Type {
            Type::List(x) => SyntaxShape::List(Box::new(x.to_shape())),
            Type::Number => SyntaxShape::Number,
            Type::Nothing => SyntaxShape::Nothing,
-            Type::Record(_) => SyntaxShape::Record,
+            Type::Record(entries) => {
+                let entries = entries
+                    .iter()
+                    .map(|(key, val)| (key.clone(), val.to_shape()))
+                    .collect();
+                SyntaxShape::Record(entries)
+            }
            Type::Table(_) => SyntaxShape::Table,
            Type::ListStream => SyntaxShape::List(Box::new(SyntaxShape::Any)),
            Type::Any => SyntaxShape::Any,
--- a/src/tests/test_engine.rs
+++ b/src/tests/test_engine.rs
@ -318,7 +318,7 @@ fn default_value11() -> TestResult {
 fn default_value12() -> TestResult {
    fail_test(
        r#"def foo [--x:int = "a"] { $x }"#,
-        "default value should be int",
+        "expected default value to be `int`",
    )
 }

--- a/src/tests/test_signatures.rs
+++ b/src/tests/test_signatures.rs
@ -132,3 +132,133 @@ fn list_annotations_with_extra_characters() -> TestResult {
    let expected = "Extra characters in the parameter name";
    fail_test(input, expected)
 }
+
+#[test]
+fn record_annotations_none() -> TestResult {
+    let input = "def run [rec: record] { $rec }; run {} | describe";
+    let expected = "record";
+    run_test(input, expected)
+}
+
+#[test]
+fn record_annotations() -> TestResult {
+    let input = "def run [rec: record<age: int>] { $rec }; run {age: 3} | describe";
+    let expected = "record<age: int>";
+    run_test(input, expected)
+}
+
+#[test]
+fn record_annotations_two_types() -> TestResult {
+    let input = "def run [rec: record<name: string age: int>] { $rec }; run {name: nushell age: 3} | describe";
+    let expected = "record<name: string, age: int>";
+    run_test(input, expected)
+}
+
+#[test]
+fn record_annotations_two_types_comma_sep() -> TestResult {
+    let input = "def run [rec: record<name: string, age: int>] { $rec }; run {name: nushell age: 3} | describe";
+    let expected = "record<name: string, age: int>";
+    run_test(input, expected)
+}
+
+#[test]
+fn record_annotations_key_with_no_type() -> TestResult {
+    let input = "def run [rec: record<name>] { $rec }; run {name: nushell} | describe";
+    let expected = "record<name: string>";
+    run_test(input, expected)
+}
+
+#[test]
+fn record_annotations_two_types_one_with_no_type() -> TestResult {
+    let input =
+        "def run [rec: record<name: string, age>] { $rec }; run {name: nushell age: 3} | describe";
+    let expected = "record<name: string, age: int>";
+    run_test(input, expected)
+}
+
+#[test]
+fn record_annotations_two_types_both_with_no_types() -> TestResult {
+    let input = "def run [rec: record<name age>] { $rec }; run {name: nushell age: 3} | describe";
+    let expected = "record<name: string, age: int>";
+    run_test(input, expected)
+}
+
+#[test]
+fn record_annotations_nested() -> TestResult {
+    let input = "def run [
+        err: record<
+            msg: string,
+            label: record<
+               text: string
+               start: int,
+               end: int,
+            >>
+    ] {
+        $err 
+    }; run {
+        msg: 'error message'
+        label: {
+            text: 'here is the error'
+            start: 0
+            end: 69
+        }
+    } | describe";
+    let expected = "record<msg: string, label: record<text: string, start: int, end: int>>";
+    run_test(input, expected)
+}
+
+#[test]
+fn record_annotations_type_inference_1() -> TestResult {
+    let input = "def run [rec: record<age: any>] { $rec }; run {age: 2wk} | describe";
+    let expected = "record<age: duration>";
+    run_test(input, expected)
+}
+
+#[test]
+fn record_annotations_type_inference_2() -> TestResult {
+    let input = "def run [rec: record<size>] { $rec }; run {size: 2mb} | describe";
+    let expected = "record<size: filesize>";
+    run_test(input, expected)
+}
+
+#[test]
+fn record_annotations_not_terminated() -> TestResult {
+    let input = "def run [rec: record<age: int] { $rec }";
+    let expected = "expected closing >";
+    fail_test(input, expected)
+}
+
+#[test]
+fn record_annotations_not_terminated_inner() -> TestResult {
+    let input = "def run [rec: record<name: string, repos: list<string>] { $rec }";
+    let expected = "expected closing >";
+    fail_test(input, expected)
+}
+
+#[test]
+fn record_annotations_no_type_after_colon() -> TestResult {
+    let input = "def run [rec: record<name: >] { $rec }";
+    let expected = "type after colon";
+    fail_test(input, expected)
+}
+
+#[test]
+fn record_annotations_type_mismatch_key() -> TestResult {
+    let input = "def run [rec: record<name: string>] { $rec }; run {nme: nushell}";
+    let expected = "expected record<name: string>, found record<nme: string>";
+    fail_test(input, expected)
+}
+
+#[test]
+fn record_annotations_type_mismatch_shape() -> TestResult {
+    let input = "def run [rec: record<age: int>] { $rec }; run {age: 2wk}";
+    let expected = "expected record<age: int>, found record<age: duration>";
+    fail_test(input, expected)
+}
+
+#[test]
+fn record_annotations_with_extra_characters() -> TestResult {
+    let input = "def run [list: record<int>extra] {$list | length}; run [1 2 3]";
+    let expected = "Extra characters in the parameter name";
+    fail_test(input, expected)
+}