Fix silent failure of parsing input output types (#14510)

- This PR should fix/close: - #11266 - #12893 - #13736 - #13748 - #14170 - It doesn't fix #13736 though unfortunately. The issue there is at a different level to this fix (I think probably in the lexing somewhere, which I haven't touched). # The Problem The linked issues have many examples of the problem and the related confusion it causes, but I'll give some more examples here for illustration. It boils down to the following: This doesn't type check (good): ```nu def foo []: string -> int { false } ``` This does (bad): ```nu def foo [] : string -> int { false } ``` Because the parser is completely ignoring all the characters. This also compiles in 0.100.0: ```nu def blue [] Da ba Dee da Ba da { false } ``` And this also means commands which have a completely fine type, but an extra space before `:`, lose that type information and end up as `any -> any`, e.g. ```nu def foo [] : int -> int {$in + 3} ``` ```bash $ foo --help Input/output types: ╭───┬───────┬────────╮ │ # │ input │ output │ ├───┼───────┼────────┤ │ 0 │ any │ any │ ╰───┴───────┴────────╯ ``` # The Fix Special thank you to @texastoland whose draft PR (#12358) I referenced heavily while making this fix. That PR seeks to fix the invalid parsing by disallowing whitespace between `[]` and `:` in declarations, e.g. `def foo [] : int -> any {}` This PR instead allows the whitespace while properly parsing the type signature. I think this is the better choice for a few reasons: - The parsing is still straightforward and the information is all there anyway, - It's more consistent with type annotations in other places, e.g. `do {|nums : list<int>| $nums | describe} [ 1 2 3 ]` from the [Type Signatures doc page](https://www.nushell.sh/lang-guide/chapters/types/type_signatures.html) - It's more consistent with the new nu parser, which allows `let x : bool = false` (current nu doesn't, but this PR doesn't change that) - It will be less disruptive and should only break code where the types are actually wrong (if your types were correct, but you had a space before the `:`, those declarations will still compile and now have more type information vs. throwing an error in all cases and requiring spaces to be deleted) - It's the more intuitive syntax for most functional programmers like myself (haskell/lean/coq/agda and many more either allow or require whitespace for type annotations) I don't use Rust a lot, so I tried to keep most things the same and the rest I wrote as if it was Haskell (if you squint a bit). Code review/suggestions very welcome. I added all the tests I could think of and `toolkit check pr` gives it the all-clear. # User-Facing Changes This PR meets part of the goal of #13849, but doesn't do anything about parsing signatures twice and doesn't do much to improve error messages, it just enforces the existing errors and error messages. This will no doubt be a breaking change, mostly because the code is already broken and users don't realise yet (one of my personal scripts stopped compiling after this fix because I thought `def foo [] -> string {}` was valid syntax). It shouldn't break any type-correct code though.
2025-01-13 05:38:57 +00:00 · 2024-12-07 15:55:15 +00:00 · 2024-12-07 15:55:15 +00:00 · 9daa5f9177
commit 9daa5f9177
parent 69fbfb939f
3 changed files with 99 additions and 22 deletions
--- a/crates/nu-parser/src/parser.rs
+++ b/crates/nu-parser/src/parser.rs
@ -3361,26 +3361,66 @@ pub fn parse_input_output_types(
 }

 pub fn parse_full_signature(working_set: &mut StateWorkingSet, spans: &[Span]) -> Expression {
-    let arg_signature = working_set.get_span_contents(spans[0]);
-
-    if arg_signature.ends_with(b":") {
-        let mut arg_signature =
-            parse_signature(working_set, Span::new(spans[0].start, spans[0].end - 1));
-
-        let input_output_types = parse_input_output_types(working_set, &spans[1..]);
-
-        if let Expression {
-            expr: Expr::Signature(sig),
-            span: expr_span,
-            ..
-        } = &mut arg_signature
-        {
-            sig.input_output_types = input_output_types;
-            expr_span.end = Span::concat(&spans[1..]).end;
+    match spans.len() {
+        // This case should never happen. It corresponds to declarations like `def foo {}`,
+        // which should throw a 'Missing required positional argument.' before getting to this point
+        0 => {
+            working_set.error(ParseError::InternalError(
+                "failed to catch missing positional arguments".to_string(),
+                Span::concat(spans),
+            ));
+            garbage(working_set, Span::concat(spans))
+        }
+
+        // e.g. `[ b"[foo: string]" ]`
+        1 => parse_signature(working_set, spans[0]),
+
+        // This case is needed to distinguish between e.g.
+        // `[ b"[]", b"{ true }" ]` vs `[ b"[]:", b"int" ]`
+        2 if working_set.get_span_contents(spans[1]).starts_with(b"{") => {
+            parse_signature(working_set, spans[0])
+        }
+
+        // This should handle every other case, e.g.
+        // `[ b"[]:", b"int" ]`
+        // `[ b"[]", b":", b"int" ]`
+        // `[ b"[]", b":", b"int", b"->", b"bool" ]`
+        _ => {
+            let (mut arg_signature, input_output_types_pos) =
+                if working_set.get_span_contents(spans[0]).ends_with(b":") {
+                    (
+                        parse_signature(working_set, Span::new(spans[0].start, spans[0].end - 1)),
+                        1,
+                    )
+                } else if working_set.get_span_contents(spans[1]) == b":" {
+                    (parse_signature(working_set, spans[0]), 2)
+                } else {
+                    // This should be an error case, but we call parse_signature anyway
+                    // so it can handle the various possible errors
+                    // e.g. `[ b"[]", b"int" ]` or `[
+                    working_set.error(ParseError::Expected(
+                        "colon (:) before type signature",
+                        Span::concat(&spans[1..]),
+                    ));
+                    // (garbage(working_set, Span::concat(spans)), 1)
+
+                    (parse_signature(working_set, spans[0]), 1)
+                };
+
+            let input_output_types =
+                parse_input_output_types(working_set, &spans[input_output_types_pos..]);
+
+            if let Expression {
+                expr: Expr::Signature(sig),
+                span: expr_span,
+                ..
+            } = &mut arg_signature
+            {
+                sig.input_output_types = input_output_types;
+                expr_span.end = Span::concat(&spans[input_output_types_pos..]).end;
+            }
+            arg_signature
        }
-        arg_signature
-    } else {
-        parse_signature(working_set, spans[0])
    }
 }

--- a/crates/nu-parser/tests/test_parser.rs
+++ b/crates/nu-parser/tests/test_parser.rs
@ -2460,6 +2460,7 @@ mod input_types {

    #[rstest]
    #[case::input_output(b"def q []: int -> int {1}", false)]
+    #[case::input_output(b"def q [x: bool]: int -> int {2}", false)]
    #[case::input_output(b"def q []: string -> string {'qwe'}", false)]
    #[case::input_output(b"def q []: nothing -> nothing {null}", false)]
    #[case::input_output(b"def q []: list<string> -> list<string> {[]}", false)]
@ -2479,6 +2480,42 @@ mod input_types {
    #[case::input_output(b"def q []: nothing -> record<c: int e: int {{c: 1 e: 1}}", true)]
    #[case::input_output(b"def q []: record<c: int e: int -> record<a: int> {{a: 1}}", true)]
    #[case::input_output(b"def q []: nothing -> record<a: record<a: int> {{a: {a: 1}}}", true)]
+    #[case::input_output(b"def q []: int []}", true)]
+    #[case::input_output(b"def q []: bool {[]", true)]
+    // Type signature variants with whitespace between inputs and `:`
+    #[case::input_output(b"def q [] : int -> int {1}", false)]
+    #[case::input_output(b"def q [x: bool] : int -> int {2}", false)]
+    #[case::input_output(b"def q []\t   : string -> string {'qwe'}", false)]
+    #[case::input_output(b"def q []  \t : nothing -> nothing {null}", false)]
+    #[case::input_output(b"def q [] \t: list<string> -> list<string> {[]}", false)]
+    #[case::input_output(
+        b"def q []\t: record<a: int b: int> -> record<c: int e: int> {{c: 1 e: 1}}",
+        false
+    )]
+    #[case::input_output(
+        b"def q [] : table<a: int b: int> -> table<c: int e: int> {[{c: 1 e: 1}]}",
+        false
+    )]
+    #[case::input_output(
+        b"def q [] : nothing -> record<c: record<a: int b: int> e: int> {{c: {a: 1 b: 2} e: 1}}",
+        false
+    )]
+    #[case::input_output(b"def q [] : nothing -> list<string {[]}", true)]
+    #[case::input_output(b"def q [] : nothing -> record<c: int e: int {{c: 1 e: 1}}", true)]
+    #[case::input_output(b"def q [] : record<c: int e: int -> record<a: int> {{a: 1}}", true)]
+    #[case::input_output(b"def q [] : nothing -> record<a: record<a: int> {{a: {a: 1}}}", true)]
+    #[case::input_output(b"def q [] : int []}", true)]
+    #[case::input_output(b"def q [] : bool {[]", true)]
+    // No input-output type signature
+    #[case::input_output(b"def qq [] {[]}", false)]
+    #[case::input_output(b"def q [] []}", true)]
+    #[case::input_output(b"def q [] {", true)]
+    #[case::input_output(b"def q []: []}", true)]
+    #[case::input_output(b"def q [] int {}", true)]
+    #[case::input_output(b"def q [x: string, y: int] {{c: 1 e: 1}}", false)]
+    #[case::input_output(b"def q [x: string, y: int]: {}", true)]
+    #[case::input_output(b"def q [x: string, y: int] {a: {a: 1}}", true)]
+    #[case::input_output(b"def foo {3}", true)]
    #[case::vardecl(b"let a: int = 1", false)]
    #[case::vardecl(b"let a: string = 'qwe'", false)]
    #[case::vardecl(b"let a: nothing = null", false)]
--- a/crates/nu-std/testing.nu
+++ b/crates/nu-std/testing.nu
@ -28,7 +28,7 @@ def valid-annotations [] {
 # Returns a table containing the list of function names together with their annotations (comments above the declaration)
 def get-annotated [
    file: path
-] path -> table<function_name: string, annotation: string> {
+]: path -> table<function_name: string, annotation: string> {
    let raw_file = (
        open $file
        | lines
@ -59,7 +59,7 @@ def get-annotated [
 # Annotations that allow multiple functions are of type list<string>
 # Other annotations are of type string
 # Result gets merged with the template record so that the output shape remains consistent regardless of the table content
-def create-test-record [] nothing -> record<before-each: string, after-each: string, before-all: string, after-all: string, test: list<string>, test-skip: list<string>> {
+def create-test-record []: nothing -> record<before-each: string, after-each: string, before-all: string, after-all: string, test: list<string>, test-skip: list<string>> {
    let input = $in

    let template_record = {
@ -187,7 +187,7 @@ export def ($test_function_name) [] {
 def run-tests-for-module [
    module: record<file: path name: string before-each: string after-each: string before-all: string after-all: string test: list test-skip: list>
    threads: int
-] -> table<file: path, name: string, test: string, result: string> {
+]: nothing -> table<file: path, name: string, test: string, result: string> {
    let global_context = if not ($module.before-all|is-empty) {
            log info $"Running before-all for module ($module.name)"
            run-test {