Parse time type checking for range (#13595)

# Description

As part of fixing https://github.com/nushell/nushell/issues/13586, this
PR checks the types of the operands when creating a range. Stuff like
`0..(glob .)` will be rejected at parse time. Additionally, `0..$x` will
be treated as a range and rejected if `x` is not defined, rather than
being treated as a string. A separate PR will need to be made to do
reject streams at runtime, so that stuff like `0..(open /dev/random)`
doesn't hang.

Internally, this PR adds a `ParseError::UnsupportedOperationTernary`
variant, for when you have a range like `1..2..(glob .)`.

# User-Facing Changes

Users will now receive an error if any of the operands in the ranges
they construct have types that aren't compatible with `Type::Number`.

Additionally, if a piece of code looks like a range but some parse error
is encountered while parsing it, that piece of code will still be
treated as a range and the user will be shown the parse error. This
means that a piece of code like `0..$x` will be treated as a range no
matter what. Previously, if `x` weren't the expression would've been
treated as a string `"0..$x"`. I feel like it makes the language less
complicated if we make it less context-sensitive.

Here's an example of the error you get:
```
> 0..(glob .)
Error: nu::parser::unsupported_operation

  × range is not supported between int and any.
   ╭─[entry #1:1:1]
 1 │ 0..(glob .)
   · ─────┬─────┬┬
   ·      │     │╰── any
   ·      │     ╰── int
   ·      ╰── doesn't support these values
   ╰────
```

And as an image:

![image](https://github.com/user-attachments/assets/5c76168d-27db-481b-b541-861dac899dbf)

Note: I made the operands themselves (above, `(glob .)`) be garbage,
rather than the `..` operator itself. This doesn't match the behavior of
the math operators (if you do `1 + "foo"`, `+` gets highlighted red).
This is because with ranges, the range operators aren't `Expression`s
themselves, so they can't be turned into garbage. I felt like here, it
makes more sense to highlight the individual operand anyway.
This commit is contained in:
Yash Thakur 2024-08-13 03:05:34 -04:00 committed by GitHub
parent a432bf94ec
commit d5946a9667
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 182 additions and 57 deletions

View file

@ -4,7 +4,7 @@ use crate::{
parse_keywords::*,
parse_patterns::parse_pattern,
parse_shape_specs::{parse_shape_name, parse_type, ShapeDescriptorUse},
type_check::{self, math_result_type, type_compatible},
type_check::{self, check_range_types, math_result_type, type_compatible},
Token, TokenContents,
};
use itertools::Itertools;
@ -109,14 +109,9 @@ pub fn is_math_expression_like(working_set: &mut StateWorkingSet, span: Span) ->
}
working_set.parse_errors.truncate(starting_error_count);
parse_range(working_set, span);
if working_set.parse_errors.len() == starting_error_count {
return true;
}
let is_range = parse_range(working_set, span).is_some();
working_set.parse_errors.truncate(starting_error_count);
false
is_range
}
fn is_identifier(bytes: &[u8]) -> bool {
@ -1389,8 +1384,7 @@ pub fn parse_call(working_set: &mut StateWorkingSet, spans: &[Span], head: Span)
trace!("-- found leading range indicator");
let starting_error_count = working_set.parse_errors.len();
let range_expr = parse_range(working_set, spans[0]);
if working_set.parse_errors.len() == starting_error_count {
if let Some(range_expr) = parse_range(working_set, spans[0]) {
trace!("-- successfully parsed range");
return range_expr;
}
@ -1598,7 +1592,7 @@ pub fn parse_number(working_set: &mut StateWorkingSet, span: Span) -> Expression
garbage(working_set, span)
}
pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression {
pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Option<Expression> {
trace!("parsing: range");
// Range follows the following syntax: [<from>][<next_operator><next>]<range_operator>[<to>]
@ -1614,12 +1608,20 @@ pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression
s
} else {
working_set.error(ParseError::NonUtf8(span));
return garbage(working_set, span);
return None;
};
if token.starts_with("...") {
working_set.error(ParseError::Expected(
"range operator ('..'), got spread ('...')",
span,
));
return None;
}
if !token.contains("..") {
working_set.error(ParseError::Expected("at least one range bound set", span));
return garbage(working_set, span);
return None;
}
// First, figure out what exact operators are used and determine their positions
@ -1633,7 +1635,7 @@ pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression
"one range operator ('..' or '..<') and optionally one next operator ('..')",
span,
));
return garbage(working_set, span);
return None;
}
};
// Avoid calling sub-parsers on unmatched parens, to prevent quadratic time on things like ((((1..2))))
@ -1648,7 +1650,7 @@ pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression
);
if let Some(_err) = err {
working_set.error(ParseError::Expected("Valid expression before ..", span));
return garbage(working_set, span);
return None;
}
}
@ -1665,7 +1667,7 @@ pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression
"inclusive operator preceding second range bound",
span,
));
return garbage(working_set, span);
return None;
}
} else {
let op_str = if token.contains("..=") { "..=" } else { ".." };
@ -1698,7 +1700,7 @@ pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression
if let (None, None) = (&from, &to) {
working_set.error(ParseError::Expected("at least one range bound set", span));
return garbage(working_set, span);
return None;
}
let (next, next_op_span) = if let Some(pos) = next_op_pos {
@ -1719,14 +1721,21 @@ pub fn parse_range(working_set: &mut StateWorkingSet, span: Span) -> Expression
next_op_span,
};
let range = Range {
let mut range = Range {
from,
next,
to,
operator,
};
Expression::new(working_set, Expr::Range(Box::new(range)), span, Type::Range)
check_range_types(working_set, &mut range);
Some(Expression::new(
working_set,
Expr::Range(Box::new(range)),
span,
Type::Range,
))
}
pub(crate) fn parse_dollar_expr(working_set: &mut StateWorkingSet, span: Span) -> Expression {
@ -1740,8 +1749,7 @@ pub(crate) fn parse_dollar_expr(working_set: &mut StateWorkingSet, span: Span) -
} else {
let starting_error_count = working_set.parse_errors.len();
let expr = parse_range(working_set, span);
if starting_error_count == working_set.parse_errors.len() {
if let Some(expr) = parse_range(working_set, span) {
expr
} else {
working_set.parse_errors.truncate(starting_error_count);
@ -1812,9 +1820,7 @@ pub fn parse_paren_expr(
) -> Expression {
let starting_error_count = working_set.parse_errors.len();
let expr = parse_range(working_set, span);
if starting_error_count == working_set.parse_errors.len() {
if let Some(expr) = parse_range(working_set, span) {
expr
} else {
working_set.parse_errors.truncate(starting_error_count);
@ -4733,7 +4739,9 @@ pub fn parse_value(
SyntaxShape::Duration => parse_duration(working_set, span),
SyntaxShape::DateTime => parse_datetime(working_set, span),
SyntaxShape::Filesize => parse_filesize(working_set, span),
SyntaxShape::Range => parse_range(working_set, span),
SyntaxShape::Range => {
parse_range(working_set, span).unwrap_or_else(|| garbage(working_set, span))
}
SyntaxShape::Filepath => parse_filepath(working_set, span),
SyntaxShape::Directory => parse_directory(working_set, span),
SyntaxShape::GlobPattern => parse_glob_pattern(working_set, span),

View file

@ -1,6 +1,7 @@
use nu_protocol::{
ast::{
Assignment, Bits, Block, Boolean, Comparison, Expr, Expression, Math, Operator, Pipeline,
Range,
},
engine::StateWorkingSet,
ParseError, Type,
@ -1132,3 +1133,63 @@ fn check_append(
}
}
}
/// If one of the parts of the range isn't a number, a parse error is added to the working set
pub fn check_range_types(working_set: &mut StateWorkingSet, range: &mut Range) {
let next_op_span = if range.next.is_some() {
range.operator.next_op_span
} else {
range.operator.span
};
match (&mut range.from, &mut range.next, &mut range.to) {
(Some(expr), _, _) | (None, Some(expr), Some(_)) | (None, None, Some(expr))
if !type_compatible(&Type::Number, &expr.ty) =>
{
working_set.error(ParseError::UnsupportedOperationLHS(
String::from("range"),
next_op_span,
expr.span,
expr.ty.clone(),
));
*expr = Expression::garbage(working_set, expr.span);
}
(Some(lhs), Some(rhs), _) if !type_compatible(&Type::Number, &rhs.ty) => {
working_set.error(ParseError::UnsupportedOperationRHS(
String::from("range"),
next_op_span,
lhs.span,
lhs.ty.clone(),
rhs.span,
rhs.ty.clone(),
));
*rhs = Expression::garbage(working_set, rhs.span);
}
(Some(lhs), Some(rhs), _) | (Some(lhs), None, Some(rhs)) | (None, Some(lhs), Some(rhs))
if !type_compatible(&Type::Number, &rhs.ty) =>
{
working_set.error(ParseError::UnsupportedOperationRHS(
String::from("range"),
range.operator.span,
lhs.span,
lhs.ty.clone(),
rhs.span,
rhs.ty.clone(),
));
*rhs = Expression::garbage(working_set, rhs.span);
}
(Some(from), Some(next), Some(to)) if !type_compatible(&Type::Number, &to.ty) => {
working_set.error(ParseError::UnsupportedOperationTernary(
String::from("range"),
range.operator.span,
from.span,
from.ty.clone(),
next.span,
next.ty.clone(),
to.span,
to.ty.clone(),
));
*to = Expression::garbage(working_set, to.span);
}
_ => (),
}
}

View file

@ -2,7 +2,7 @@ use nu_parser::*;
use nu_protocol::{
ast::{Argument, Expr, Expression, ExternalArgument, PathMember, Range},
engine::{Call, Command, EngineState, Stack, StateWorkingSet},
ParseError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
Category, ParseError, PipelineData, ShellError, Signature, Span, SyntaxShape, Type,
};
use rstest::rstest;
@ -76,6 +76,35 @@ impl Command for Mut {
}
}
#[derive(Clone)]
pub struct ToCustom;
impl Command for ToCustom {
fn name(&self) -> &str {
"to-custom"
}
fn usage(&self) -> &str {
"Mock converter command."
}
fn signature(&self) -> nu_protocol::Signature {
Signature::build(self.name())
.input_output_type(Type::Any, Type::Custom("custom".into()))
.category(Category::Custom("custom".into()))
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
_call: &Call,
_input: PipelineData,
) -> Result<PipelineData, ShellError> {
todo!()
}
}
fn test_int(
test_tag: &str, // name of sub-test
test: &[u8], // input expression
@ -1799,13 +1828,55 @@ mod range {
}
#[test]
fn vars_not_read_as_units() {
fn vars_read_as_units() {
let engine_state = EngineState::new();
let mut working_set = StateWorkingSet::new(&engine_state);
let _ = parse(&mut working_set, None, b"0..<$day", true);
assert!(working_set.parse_errors.is_empty());
assert!(
working_set.parse_errors.len() == 1,
"Errors: {:?}",
working_set.parse_errors
);
let err = &working_set.parse_errors[0].to_string();
assert!(
err.contains("Variable not found"),
"Expected variable not found error, got {}",
err
);
}
#[rstest]
#[case("(to-custom)..")]
#[case("..(to-custom)")]
#[case("(to-custom)..0")]
#[case("..(to-custom)..0")]
#[case("(to-custom)..0..")]
#[case("(to-custom)..0..1")]
#[case("0..(to-custom)")]
#[case("0..(to-custom)..")]
#[case("0..(to-custom)..1")]
#[case("..1..(to-custom)")]
#[case("0..1..(to-custom)")]
fn type_mismatch_errors(#[case] code: &str) {
let engine_state = EngineState::new();
let mut working_set = StateWorkingSet::new(&engine_state);
working_set.add_decl(Box::new(ToCustom));
let _ = parse(&mut working_set, None, code.as_bytes(), true);
assert!(
working_set.parse_errors.len() == 1,
"Errors: {:?}",
working_set.parse_errors
);
let err = &working_set.parse_errors[0].to_string();
assert!(
err.contains("range is not supported"),
"Expected unsupported operation error, got {}",
err
);
}
}
@ -1902,35 +1973,6 @@ mod input_types {
}
}
#[derive(Clone)]
pub struct ToCustom;
impl Command for ToCustom {
fn name(&self) -> &str {
"to-custom"
}
fn usage(&self) -> &str {
"Mock converter command."
}
fn signature(&self) -> nu_protocol::Signature {
Signature::build(self.name())
.input_output_type(Type::Any, Type::Custom("custom".into()))
.category(Category::Custom("custom".into()))
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
_call: &Call,
_input: PipelineData,
) -> Result<PipelineData, ShellError> {
todo!()
}
}
#[derive(Clone)]
pub struct GroupByCustom;

View file

@ -131,6 +131,19 @@ pub enum ParseError {
Type,
),
#[error("{0} is not supported between {3}, {5}, and {7}.")]
#[diagnostic(code(nu::parser::unsupported_operation))]
UnsupportedOperationTernary(
String,
#[label = "doesn't support these values"] Span,
#[label("{3}")] Span,
Type,
#[label("{5}")] Span,
Type,
#[label("{7}")] Span,
Type,
),
#[error("Capture of mutable variable.")]
#[diagnostic(code(nu::parser::expected_keyword))]
CaptureOfMutableVar(#[label("capture of mutable variable")] Span),
@ -517,6 +530,7 @@ impl ParseError {
ParseError::Mismatch(_, _, s) => *s,
ParseError::UnsupportedOperationLHS(_, _, s, _) => *s,
ParseError::UnsupportedOperationRHS(_, _, _, _, s, _) => *s,
ParseError::UnsupportedOperationTernary(_, _, _, _, _, _, s, _) => *s,
ParseError::ExpectedKeyword(_, s) => *s,
ParseError::UnexpectedKeyword(_, s) => *s,
ParseError::CantAliasKeyword(_, s) => *s,