allow records to have type annotations (#8914)

# Description
follow up to #8529
cleaned up version of #8892 

- the original syntax is okay
```nu
def okay [rec: record] {}
```
- you can now add type annotations for fields if you know
  them before hand
```nu
def okay [rec: record<name: string>] {}
```

- you can specify multiple fields
```nu
def okay [person: record<name: string age: int>] {}

# an optional comma is allowed
def okay [person: record<name: string, age: int>] {}
```

- if annotations are specified, any use of the command will be type
  checked against the specified type
```nu
def unwrap [result: record<ok: bool, value: any>] {}

unwrap {ok: 2, value: "value"}

# errors with

Error: nu::parser::type_mismatch

  × Type mismatch.
   ╭─[entry #4:1:1]
 1 │ unwrap {ok: 2, value: "value"}
   ·         ───────┬─────
   ·                    ╰── expected record<ok: bool, value: any>, found record<ok: int, value: string>
   ╰────
```
> here the error is in the `ok` field, since `any` is coerced into any
type
> as a result `unwrap {ok: true, value: "value"}` is okay

- the key must be a string, either quoted or unquoted
```nu
def err [rec: record<{}: list>] {}

# errors with
Error:
  × `record` type annotations key not string
   ╭─[entry #7:1:1]
 1 │ def unwrap [result: record<{}: bool, value: any>] {}
   ·                            ─┬
   ·                             ╰── must be a string
   ╰────
```

- a key doesn't have to have a type in which case it is assumed to be
`any`
```nu
def okay [person: record<name age>] {}

def okay [person: record<name: string age>] {}
```

- however, if you put a colon, you have to specify a type
```nu
def err [person: record<name: >] {}

# errors with
Error: nu::parser::parse_mismatch

  × Parse mismatch during operation.
   ╭─[entry #12:1:1]
 1 │ def unwrap [res: record<name: >] { $res }
   ·                             ┬
   ·                             ╰── expected type after colon
   ╰────
```

# User-Facing Changes
**[BREAKING CHANGES]**
- this change adds a field to `SyntaxShape::Record` so any plugins that
used it will have to update and include the field. though if you are
unsure of the type the record expects, `SyntaxShape::Record(vec![])`
will suffice
This commit is contained in:
mike 2023-04-26 16:16:55 +03:00 committed by GitHub
parent 48c75831fc
commit 77ca73f414
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 347 additions and 93 deletions

View file

@ -16,7 +16,11 @@ impl Command for ErrorMake {
fn signature(&self) -> Signature {
Signature::build("error make")
.input_output_types(vec![(Type::Nothing, Type::Error)])
.required("error_struct", SyntaxShape::Record, "the error to create")
.required(
"error_struct",
SyntaxShape::Record(vec![]),
"the error to create",
)
.switch(
"unspanned",
"remove the origin label from the error",

View file

@ -23,7 +23,7 @@ impl Command for LoadEnv {
.allow_variants_without_examples(true)
.optional(
"update",
SyntaxShape::Record,
SyntaxShape::Record(vec![]),
"the record to use for updates",
)
.category(Category::FileSystem)

View file

@ -2775,7 +2775,7 @@ pub fn parse_shape_name(
b"operator" => SyntaxShape::Operator,
b"path" => SyntaxShape::Filepath,
b"range" => SyntaxShape::Range,
b"record" => SyntaxShape::Record,
_ if bytes.starts_with(b"record") => parse_collection_shape(working_set, bytes, span),
b"signature" => SyntaxShape::Signature,
b"string" => SyntaxShape::String,
b"table" => SyntaxShape::Table,
@ -2814,38 +2814,129 @@ pub fn parse_shape_name(
result
}
fn parse_collection_shape(
working_set: &mut StateWorkingSet,
bytes: &[u8],
span: Span,
) -> SyntaxShape {
assert!(bytes.starts_with(b"record"));
let name = "record";
let mk_shape = SyntaxShape::Record;
if bytes == name.as_bytes() {
mk_shape(vec![])
} else if bytes.starts_with(b"record<") {
let Some(inner_span) = prepare_inner_span(working_set, bytes, span, 7) else {
return SyntaxShape::Any;
};
// record<> or table<>
if inner_span.end - inner_span.start == 0 {
return mk_shape(vec![]);
}
let source = working_set.get_span_contents(inner_span);
let (tokens, err) = lex_signature(
source,
inner_span.start,
&[b'\n', b'\r'],
&[b':', b','],
true,
);
if let Some(err) = err {
working_set.error(err);
// lexer errors cause issues with span overflows
return mk_shape(vec![]);
}
let mut sig = vec![];
let mut idx = 0;
let key_error = |span| {
ParseError::LabeledError(
format!("`{name}` type annotations key not string"),
"must be a string".into(),
span,
)
};
while idx < tokens.len() {
let TokenContents::Item = tokens[idx].contents else {
working_set.error(key_error(tokens[idx].span));
return mk_shape(vec![])
};
let key_bytes = working_set.get_span_contents(tokens[idx].span).to_vec();
if key_bytes.first().copied() == Some(b',') {
idx += 1;
continue;
}
let Some(key) = parse_value(working_set, tokens[idx].span, &SyntaxShape::String).as_string() else {
working_set.error(key_error(tokens[idx].span));
return mk_shape(vec![]);
};
// we want to allow such an annotation
// `record<name>` where the user leaves out the type
if idx + 1 == tokens.len() {
sig.push((key, SyntaxShape::Any));
break;
} else {
idx += 1;
}
let maybe_colon = working_set.get_span_contents(tokens[idx].span).to_vec();
match maybe_colon.as_slice() {
b":" => {
if idx + 1 == tokens.len() {
working_set.error(ParseError::Expected(
"type after colon".into(),
tokens[idx].span,
));
break;
} else {
idx += 1;
}
}
// a key provided without a type
b"," => {
idx += 1;
sig.push((key, SyntaxShape::Any));
continue;
}
// a key provided without a type
_ => {
sig.push((key, SyntaxShape::Any));
continue;
}
}
let shape_bytes = working_set.get_span_contents(tokens[idx].span).to_vec();
let shape = parse_shape_name(working_set, &shape_bytes, tokens[idx].span);
sig.push((key, shape));
idx += 1;
}
mk_shape(sig)
} else {
working_set.error(ParseError::UnknownType(span));
SyntaxShape::Any
}
}
fn parse_list_shape(working_set: &mut StateWorkingSet, bytes: &[u8], span: Span) -> SyntaxShape {
assert!(bytes.starts_with(b"list"));
if bytes == b"list" {
SyntaxShape::List(Box::new(SyntaxShape::Any))
} else if bytes.starts_with(b"list<") {
let start = span.start + 5;
// if the annotation is unterminated, we want to return early to avoid
// overflows with spans
let end = if bytes.ends_with(b">") {
span.end - 1
// extra characters after the >
} else if bytes.contains(&b'>') {
let angle_start = bytes.split(|it| it == &b'>').collect::<Vec<_>>()[0].len() + 1;
let span = Span::new(span.start + angle_start, span.end);
working_set.error(ParseError::LabeledError(
"Extra characters in the parameter name".into(),
"extra characters".into(),
span,
));
let Some(inner_span) = prepare_inner_span(working_set, bytes, span, 5) else {
return SyntaxShape::Any;
} else {
working_set.error(ParseError::Unclosed(">".into(), span));
return SyntaxShape::List(Box::new(SyntaxShape::Any));
};
let inner_span = Span::new(start, end);
let inner_text = String::from_utf8_lossy(working_set.get_span_contents(inner_span));
// remove any extra whitespace, for example `list< string >` becomes `list<string>`
let inner_bytes = inner_text.trim().as_bytes().to_vec();
@ -2864,6 +2955,34 @@ fn parse_list_shape(working_set: &mut StateWorkingSet, bytes: &[u8], span: Span)
}
}
fn prepare_inner_span(
working_set: &mut StateWorkingSet,
bytes: &[u8],
span: Span,
prefix_len: usize,
) -> Option<Span> {
let start = span.start + prefix_len;
if bytes.ends_with(b">") {
let end = span.end - 1;
Some(Span::new(start, end))
} else if bytes.contains(&b'>') {
let angle_start = bytes.split(|it| it == &b'>').collect::<Vec<_>>()[0].len() + 1;
let span = Span::new(span.start + angle_start, span.end);
working_set.error(ParseError::LabeledError(
"Extra characters in the parameter name".into(),
"extra characters".into(),
span,
));
None
} else {
working_set.error(ParseError::Unclosed(">".into(), span));
None
}
}
pub fn parse_type(_working_set: &StateWorkingSet, bytes: &[u8]) -> Type {
match bytes {
b"binary" => Type::Binary,
@ -3602,43 +3721,13 @@ pub fn parse_signature_helper(working_set: &mut StateWorkingSet, span: Span) ->
expression.ty.clone(),
);
}
Type::List(param_ty) => {
if let Type::List(expr_ty) = &expression.ty {
if param_ty == expr_ty
|| **param_ty == Type::Any
{
working_set.set_variable_type(
var_id,
expression.ty.clone(),
);
} else {
working_set.error(ParseError::AssignmentMismatch(
"Default value wrong type"
.into(),
format!(
"expected default value to be `{var_type}`",
),
expression.span,
),
)
}
} else {
working_set.error(ParseError::AssignmentMismatch(
"Default value wrong type".into(),
format!(
"expected default value to be `{var_type}`",
),
expression.span,
))
}
}
t => {
if t != &expression.ty {
_ => {
if !type_compatible(var_type, &expression.ty) {
working_set.error(
ParseError::AssignmentMismatch(
"Default value wrong type".into(),
format!(
"expected default value to be `{t}`"
"expected default value to be `{var_type}`"
),
expression.span,
),
@ -3686,7 +3775,7 @@ pub fn parse_signature_helper(working_set: &mut StateWorkingSet, span: Span) ->
"Default value is the wrong type"
.into(),
format!(
"default value should be {t}"
"expected default value to be `{t}`"
),
expression_span,
),
@ -4485,7 +4574,7 @@ pub fn parse_value(
// Be sure to return ParseError::Expected(..) if invoked for one of these shapes, but lex
// stream doesn't start with '{'} -- parsing in SyntaxShape::Any arm depends on this error variant.
SyntaxShape::Block | SyntaxShape::Closure(..) | SyntaxShape::Record => {
SyntaxShape::Block | SyntaxShape::Closure(..) | SyntaxShape::Record(_) => {
working_set.error(ParseError::Expected(
"block, closure or record".into(),
span,
@ -4505,7 +4594,7 @@ pub fn parse_value(
SyntaxShape::Duration,
SyntaxShape::Range,
SyntaxShape::DateTime, //FIXME requires 3 failed conversion attempts before failing
SyntaxShape::Record,
SyntaxShape::Record(vec![]),
SyntaxShape::Closure(None),
SyntaxShape::Block,
SyntaxShape::Int,

View file

@ -5,6 +5,21 @@ use nu_protocol::{
};
pub fn type_compatible(lhs: &Type, rhs: &Type) -> bool {
// Structural subtyping
let is_compatible = |expected: &[(String, Type)], found: &[(String, Type)]| {
// the expected type is `any`
if expected.is_empty() {
true
} else if expected.len() != found.len() {
false
} else {
expected
.iter()
.zip(found.iter())
.all(|(lhs, rhs)| lhs.0 == rhs.0 && type_compatible(&lhs.1, &rhs.1))
}
};
match (lhs, rhs) {
(Type::List(c), Type::List(d)) => type_compatible(c, d),
(Type::Number, Type::Int) => true,
@ -13,20 +28,7 @@ pub fn type_compatible(lhs: &Type, rhs: &Type) -> bool {
(Type::Any, _) => true,
(_, Type::Any) => true,
(Type::Record(fields_lhs), Type::Record(fields_rhs)) => {
// Structural subtyping
'outer: for field_lhs in fields_lhs {
for field_rhs in fields_rhs {
if field_lhs.0 == field_rhs.0 {
if type_compatible(&field_lhs.1, &field_rhs.1) {
continue 'outer;
} else {
return false;
}
}
}
return false;
}
true
is_compatible(fields_lhs, fields_rhs)
}
(lhs, rhs) => lhs == rhs,
}

View file

@ -95,7 +95,7 @@ pub enum SyntaxShape {
Range,
/// A record value, eg `{x: 1, y: 2}`
Record,
Record(Vec<(String, SyntaxShape)>),
/// A math expression which expands shorthand forms on the lefthand side, eg `foo > 1`
/// The shorthand allows us to more easily reach columns inside of the row being passed in
@ -151,7 +151,13 @@ impl SyntaxShape {
SyntaxShape::OneOf(_) => Type::Any,
SyntaxShape::Operator => Type::Any,
SyntaxShape::Range => Type::Any,
SyntaxShape::Record => Type::Record(vec![]), // FIXME: What role should fields play in the Record type?
SyntaxShape::Record(entries) => {
let ty = entries
.iter()
.map(|(key, val)| (key.clone(), val.to_type()))
.collect();
Type::Record(ty)
}
SyntaxShape::RowCondition => Type::Bool,
SyntaxShape::Boolean => Type::Bool,
SyntaxShape::Signature => Type::Signature,
@ -194,7 +200,21 @@ impl Display for SyntaxShape {
SyntaxShape::Binary => write!(f, "binary"),
SyntaxShape::Table => write!(f, "table"),
SyntaxShape::List(x) => write!(f, "list<{x}>"),
SyntaxShape::Record => write!(f, "record"),
SyntaxShape::Record(entries) => {
if entries.is_empty() {
write!(f, "record")
} else {
write!(
f,
"record<{}>",
entries
.iter()
.map(|(x, y)| format!("{x}: {y}"))
.collect::<Vec<String>>()
.join(", "),
)
}
}
SyntaxShape::Filesize => write!(f, "filesize"),
SyntaxShape::Duration => write!(f, "duration"),
SyntaxShape::DateTime => write!(f, "datetime"),

View file

@ -35,25 +35,28 @@ pub enum Type {
impl Type {
pub fn is_subtype(&self, other: &Type) -> bool {
// Structural subtyping
let is_subtype_collection = |this: &[(String, Type)], that: &[(String, Type)]| {
if this.is_empty() || that.is_empty() {
true
} else if this.len() != that.len() {
false
} else {
this.iter()
.zip(that.iter())
.all(|(lhs, rhs)| lhs.0 == rhs.0 && lhs.1.is_subtype(&rhs.1))
}
};
match (self, other) {
(t, u) if t == u => true,
(Type::Float, Type::Number) => true,
(Type::Int, Type::Number) => true,
(_, Type::Any) => true,
(Type::List(t), Type::List(u)) if t.is_subtype(u) => true, // List is covariant
// TODO: Currently Record types specify their field types. If we are
// going to continue to do that, then it might make sense to define
// a "structural subtyping" whereby r1 is a subtype of r2 is the
// fields of r1 are a "subset" of the fields of r2 (names are a
// subset and agree on types). However, if we do that, then we need
// a way to specify the supertype of all Records. For now, we define
// any Record to be a subtype of any other Record. This allows
// Record(vec![]) to be used as an ad-hoc supertype of all Records
// in command signatures. This comment applies to Tables also, with
// "columns" in place of "fields".
(Type::Record(_), Type::Record(_)) => true,
(Type::Table(_), Type::Table(_)) => true,
(Type::Record(this), Type::Record(that)) | (Type::Table(this), Type::Table(that)) => {
is_subtype_collection(this, that)
}
_ => false,
}
}
@ -87,7 +90,13 @@ impl Type {
Type::List(x) => SyntaxShape::List(Box::new(x.to_shape())),
Type::Number => SyntaxShape::Number,
Type::Nothing => SyntaxShape::Nothing,
Type::Record(_) => SyntaxShape::Record,
Type::Record(entries) => {
let entries = entries
.iter()
.map(|(key, val)| (key.clone(), val.to_shape()))
.collect();
SyntaxShape::Record(entries)
}
Type::Table(_) => SyntaxShape::Table,
Type::ListStream => SyntaxShape::List(Box::new(SyntaxShape::Any)),
Type::Any => SyntaxShape::Any,

View file

@ -318,7 +318,7 @@ fn default_value11() -> TestResult {
fn default_value12() -> TestResult {
fail_test(
r#"def foo [--x:int = "a"] { $x }"#,
"default value should be int",
"expected default value to be `int`",
)
}

View file

@ -132,3 +132,133 @@ fn list_annotations_with_extra_characters() -> TestResult {
let expected = "Extra characters in the parameter name";
fail_test(input, expected)
}
#[test]
fn record_annotations_none() -> TestResult {
let input = "def run [rec: record] { $rec }; run {} | describe";
let expected = "record";
run_test(input, expected)
}
#[test]
fn record_annotations() -> TestResult {
let input = "def run [rec: record<age: int>] { $rec }; run {age: 3} | describe";
let expected = "record<age: int>";
run_test(input, expected)
}
#[test]
fn record_annotations_two_types() -> TestResult {
let input = "def run [rec: record<name: string age: int>] { $rec }; run {name: nushell age: 3} | describe";
let expected = "record<name: string, age: int>";
run_test(input, expected)
}
#[test]
fn record_annotations_two_types_comma_sep() -> TestResult {
let input = "def run [rec: record<name: string, age: int>] { $rec }; run {name: nushell age: 3} | describe";
let expected = "record<name: string, age: int>";
run_test(input, expected)
}
#[test]
fn record_annotations_key_with_no_type() -> TestResult {
let input = "def run [rec: record<name>] { $rec }; run {name: nushell} | describe";
let expected = "record<name: string>";
run_test(input, expected)
}
#[test]
fn record_annotations_two_types_one_with_no_type() -> TestResult {
let input =
"def run [rec: record<name: string, age>] { $rec }; run {name: nushell age: 3} | describe";
let expected = "record<name: string, age: int>";
run_test(input, expected)
}
#[test]
fn record_annotations_two_types_both_with_no_types() -> TestResult {
let input = "def run [rec: record<name age>] { $rec }; run {name: nushell age: 3} | describe";
let expected = "record<name: string, age: int>";
run_test(input, expected)
}
#[test]
fn record_annotations_nested() -> TestResult {
let input = "def run [
err: record<
msg: string,
label: record<
text: string
start: int,
end: int,
>>
] {
$err
}; run {
msg: 'error message'
label: {
text: 'here is the error'
start: 0
end: 69
}
} | describe";
let expected = "record<msg: string, label: record<text: string, start: int, end: int>>";
run_test(input, expected)
}
#[test]
fn record_annotations_type_inference_1() -> TestResult {
let input = "def run [rec: record<age: any>] { $rec }; run {age: 2wk} | describe";
let expected = "record<age: duration>";
run_test(input, expected)
}
#[test]
fn record_annotations_type_inference_2() -> TestResult {
let input = "def run [rec: record<size>] { $rec }; run {size: 2mb} | describe";
let expected = "record<size: filesize>";
run_test(input, expected)
}
#[test]
fn record_annotations_not_terminated() -> TestResult {
let input = "def run [rec: record<age: int] { $rec }";
let expected = "expected closing >";
fail_test(input, expected)
}
#[test]
fn record_annotations_not_terminated_inner() -> TestResult {
let input = "def run [rec: record<name: string, repos: list<string>] { $rec }";
let expected = "expected closing >";
fail_test(input, expected)
}
#[test]
fn record_annotations_no_type_after_colon() -> TestResult {
let input = "def run [rec: record<name: >] { $rec }";
let expected = "type after colon";
fail_test(input, expected)
}
#[test]
fn record_annotations_type_mismatch_key() -> TestResult {
let input = "def run [rec: record<name: string>] { $rec }; run {nme: nushell}";
let expected = "expected record<name: string>, found record<nme: string>";
fail_test(input, expected)
}
#[test]
fn record_annotations_type_mismatch_shape() -> TestResult {
let input = "def run [rec: record<age: int>] { $rec }; run {age: 2wk}";
let expected = "expected record<age: int>, found record<age: duration>";
fail_test(input, expected)
}
#[test]
fn record_annotations_with_extra_characters() -> TestResult {
let input = "def run [list: record<int>extra] {$list | length}; run [1 2 3]";
let expected = "Extra characters in the parameter name";
fail_test(input, expected)
}