From 51eaa01e8d73e1393854b082a814916c6df28860 Mon Sep 17 00:00:00 2001 From: Devyn Cairns Date: Sat, 21 Dec 2024 13:59:58 -0800 Subject: [PATCH] Change how `and` and `or` operations are compiled to IR to support custom values Because `and` and `or` are short-circuiting operations in Nushell, they must be compiled to a sequence that avoids evaluating the RHS if the LHS is already sufficient to determine the output - i.e., `false` for `and` and `true` for `or`. I initially implemented this with `branch-if` instructions, simply returning the RHS if it needed to be evaluated, and returning the short-circuited boolean value if it did not. Example for `$a and $b`: ``` 0: load-variable %0, var 999 "$a" 1: branch-if %0, 3 2: jump 5 3: load-variable %0, var 1000 "$b" # label(0), from(1:) 4: jump 6 5: load-literal %0, bool(false) # label(1), from(2:) 6: span %0 # label(2), from(4:) 7: return %0 ``` Unfortunately, this broke polars, because using `and`/`or` on custom values is perfectly valid and they're allowed to define that behavior differently, and the polars plugin uses this for boolean masks. But without using the `binary-op` instruction, that custom behavior is never invoked. Additionally, `branch-if` requires a boolean, and custom values are not booleans. This changes the IR to the following, using the `match` instruction to check for the specific short-circuit value instead, and still invoking `binary-op` otherwise: ``` 0: load-variable %0, var 125 "$a" 1: match (false), %0, 4 2: load-variable %1, var 124 "$b" 3: binary-op %0, Boolean(And), %1 4: span %0 # label(0), from(1:) 5: return %0 ``` I've also renamed `Pattern::Value` to `Pattern::Expression` and added a proper `Pattern::Value` variant that actually contains a `Value` instead. I'm still hoping to remove `Pattern::Expression` eventually, because it's kind of a hack - we don't actually evaluate the expression, we just match it against a few cases specifically for pattern matching, and it's one of the cases where AST leaks into IR and I want to remove all of those cases, because AST should not leak into IR. Fixes #14518 --- crates/nu-engine/src/compile/builder.rs | 19 +++++ crates/nu-engine/src/compile/keyword.rs | 12 ++- crates/nu-engine/src/compile/operator.rs | 76 +++++++++---------- crates/nu-parser/src/flatten.rs | 4 +- crates/nu-parser/src/parse_patterns.rs | 2 +- crates/nu-parser/src/parser.rs | 6 +- crates/nu-protocol/src/ast/match_pattern.rs | 14 +++- .../nu-protocol/src/engine/pattern_match.rs | 3 +- crates/nu-protocol/src/ir/display.rs | 5 +- 9 files changed, 85 insertions(+), 56 deletions(-) diff --git a/crates/nu-engine/src/compile/builder.rs b/crates/nu-engine/src/compile/builder.rs index 0e2e943053..71f41ee210 100644 --- a/crates/nu-engine/src/compile/builder.rs +++ b/crates/nu-engine/src/compile/builder.rs @@ -1,4 +1,5 @@ use nu_protocol::{ + ast::Pattern, ir::{DataSlice, Instruction, IrAstRef, IrBlock, Literal}, CompileError, IntoSpanned, RegId, Span, Spanned, }; @@ -426,6 +427,24 @@ impl BlockBuilder { self.push(Instruction::Jump { index: label_id.0 }.into_spanned(span)) } + /// Add a `match` instruction + pub(crate) fn r#match( + &mut self, + pattern: Pattern, + src: RegId, + label_id: LabelId, + span: Span, + ) -> Result<(), CompileError> { + self.push( + Instruction::Match { + pattern: Box::new(pattern), + src, + index: label_id.0, + } + .into_spanned(span), + ) + } + /// The index that the next instruction [`.push()`](Self::push)ed will have. pub(crate) fn here(&self) -> usize { self.instructions.len() diff --git a/crates/nu-engine/src/compile/keyword.rs b/crates/nu-engine/src/compile/keyword.rs index b7f378f592..b22e055a21 100644 --- a/crates/nu-engine/src/compile/keyword.rs +++ b/crates/nu-engine/src/compile/keyword.rs @@ -197,13 +197,11 @@ pub(crate) fn compile_match( for (pattern, _) in match_block { let match_label = builder.label(None); match_labels.push(match_label); - builder.push( - Instruction::Match { - pattern: Box::new(pattern.pattern.clone()), - src: match_reg, - index: match_label.0, - } - .into_spanned(pattern.span), + builder.r#match( + pattern.pattern.clone(), + match_reg, + match_label, + pattern.span, )?; // Also add a label for the next match instruction or failure case next_labels.push(builder.label(Some(builder.here()))); diff --git a/crates/nu-engine/src/compile/operator.rs b/crates/nu-engine/src/compile/operator.rs index 0f7059e027..735beaa10c 100644 --- a/crates/nu-engine/src/compile/operator.rs +++ b/crates/nu-engine/src/compile/operator.rs @@ -1,8 +1,8 @@ use nu_protocol::{ - ast::{Assignment, Boolean, CellPath, Expr, Expression, Math, Operator, PathMember}, + ast::{Assignment, Boolean, CellPath, Expr, Expression, Math, Operator, PathMember, Pattern}, engine::StateWorkingSet, ir::{Instruction, Literal}, - IntoSpanned, RegId, Span, Spanned, ENV_VARIABLE_ID, + IntoSpanned, RegId, Span, Spanned, Value, ENV_VARIABLE_ID, }; use nu_utils::IgnoreCaseExt; @@ -59,56 +59,52 @@ pub(crate) fn compile_binary_op( )?; match op.item { - // `and` / `or` are short-circuiting, and we can get by with one register and a branch - Operator::Boolean(Boolean::And) => { - let true_label = builder.label(None); - builder.branch_if(lhs_reg, true_label, op.span)?; + // `and` / `or` are short-circuiting, use `match` to avoid running the RHS if LHS is + // the correct value. Be careful to support and/or on non-boolean values + Operator::Boolean(bool_op @ Boolean::And) + | Operator::Boolean(bool_op @ Boolean::Or) => { + // `and` short-circuits on false, and `or` short-circuits on true. + let short_circuit_value = match bool_op { + Boolean::And => false, + Boolean::Or => true, + Boolean::Xor => unreachable!(), + }; - // If the branch was not taken it's false, so short circuit to load false - let false_label = builder.label(None); - builder.jump(false_label, op.span)?; + // Short-circuit to return `lhs_reg`. `match` op does not consume `lhs_reg`. + let short_circuit_label = builder.label(None); + builder.r#match( + Pattern::Value(Value::bool(short_circuit_value, op.span)), + lhs_reg, + short_circuit_label, + op.span, + )?; - builder.set_label(true_label, builder.here())?; + // If the match failed then this was not the short-circuit value, so we have to run + // the RHS expression + let rhs_reg = builder.next_register()?; compile_expression( working_set, builder, rhs, RedirectModes::value(rhs.span), None, - lhs_reg, + rhs_reg, )?; - let end_label = builder.label(None); - builder.jump(end_label, op.span)?; - - // Consumed by `branch-if`, so we have to set it false again - builder.set_label(false_label, builder.here())?; - builder.load_literal(lhs_reg, Literal::Bool(false).into_spanned(lhs.span))?; - - builder.set_label(end_label, builder.here())?; - } - Operator::Boolean(Boolean::Or) => { - let true_label = builder.label(None); - builder.branch_if(lhs_reg, true_label, op.span)?; - - // If the branch was not taken it's false, so do the right-side expression - compile_expression( - working_set, - builder, - rhs, - RedirectModes::value(rhs.span), - None, - lhs_reg, + // It may seem intuitive that we can just return RHS here, but we do have to + // actually execute the binary-op in case this is not a boolean + builder.push( + Instruction::BinaryOp { + lhs_dst: lhs_reg, + op: Operator::Boolean(bool_op), + rhs: rhs_reg, + } + .into_spanned(op.span), )?; - let end_label = builder.label(None); - builder.jump(end_label, op.span)?; - - // Consumed by `branch-if`, so we have to set it true again - builder.set_label(true_label, builder.here())?; - builder.load_literal(lhs_reg, Literal::Bool(true).into_spanned(lhs.span))?; - - builder.set_label(end_label, builder.here())?; + // In either the short-circuit case or other case, the result is in lhs_reg = + // out_reg + builder.set_label(short_circuit_label, builder.here())?; } _ => { // Any other operator, via `binary-op` diff --git a/crates/nu-parser/src/flatten.rs b/crates/nu-parser/src/flatten.rs index 318ae00ffd..731ab52984 100644 --- a/crates/nu-parser/src/flatten.rs +++ b/crates/nu-parser/src/flatten.rs @@ -644,7 +644,9 @@ fn flatten_pattern_into(match_pattern: &MatchPattern, output: &mut Vec<(Span, Fl output.push((match_pattern.span, FlatShape::MatchPattern)); } } - Pattern::Value(_) => output.push((match_pattern.span, FlatShape::MatchPattern)), + Pattern::Expression(_) | Pattern::Value(_) => { + output.push((match_pattern.span, FlatShape::MatchPattern)) + } Pattern::Variable(var_id) => output.push((match_pattern.span, FlatShape::VarDecl(*var_id))), Pattern::Rest(var_id) => output.push((match_pattern.span, FlatShape::VarDecl(*var_id))), Pattern::Or(patterns) => { diff --git a/crates/nu-parser/src/parse_patterns.rs b/crates/nu-parser/src/parse_patterns.rs index 6f7cb62bc2..05193c6696 100644 --- a/crates/nu-parser/src/parse_patterns.rs +++ b/crates/nu-parser/src/parse_patterns.rs @@ -40,7 +40,7 @@ pub fn parse_pattern(working_set: &mut StateWorkingSet, span: Span) -> MatchPatt let value = parse_value(working_set, span, &SyntaxShape::Any); MatchPattern { - pattern: Pattern::Value(Box::new(value)), + pattern: Pattern::Expression(Box::new(value)), guard: None, span, } diff --git a/crates/nu-parser/src/parser.rs b/crates/nu-parser/src/parser.rs index d71ef77182..277ce83e7f 100644 --- a/crates/nu-parser/src/parser.rs +++ b/crates/nu-parser/src/parser.rs @@ -6286,7 +6286,11 @@ pub fn discover_captures_in_pattern(pattern: &MatchPattern, seen: &mut Vec seen.push(*var_id), - Pattern::Value(_) | Pattern::IgnoreValue | Pattern::IgnoreRest | Pattern::Garbage => {} + Pattern::Expression(_) + | Pattern::Value(_) + | Pattern::IgnoreValue + | Pattern::IgnoreRest + | Pattern::Garbage => {} } } diff --git a/crates/nu-protocol/src/ast/match_pattern.rs b/crates/nu-protocol/src/ast/match_pattern.rs index ca639270aa..3bceb7c329 100644 --- a/crates/nu-protocol/src/ast/match_pattern.rs +++ b/crates/nu-protocol/src/ast/match_pattern.rs @@ -1,5 +1,5 @@ use super::Expression; -use crate::{Span, VarId}; +use crate::{Span, Value, VarId}; use serde::{Deserialize, Serialize}; /// AST Node for match arm with optional match guard @@ -23,10 +23,12 @@ pub enum Pattern { Record(Vec<(String, MatchPattern)>), /// List destructuring List(Vec), - /// Matching against a literal + /// Matching against a literal (from expression result) // TODO: it would be nice if this didn't depend on AST // maybe const evaluation can get us to a Value instead? - Value(Box), + Expression(Box), + /// Matching against a literal (pure value) + Value(Value), /// binding to a variable Variable(VarId), /// the `pattern1 \ pattern2` or-pattern @@ -62,7 +64,11 @@ impl Pattern { } } Pattern::Rest(var_id) => output.push(*var_id), - Pattern::Value(_) | Pattern::IgnoreValue | Pattern::Garbage | Pattern::IgnoreRest => {} + Pattern::Expression(_) + | Pattern::Value(_) + | Pattern::IgnoreValue + | Pattern::Garbage + | Pattern::IgnoreRest => {} } output diff --git a/crates/nu-protocol/src/engine/pattern_match.rs b/crates/nu-protocol/src/engine/pattern_match.rs index 32c34d22d3..bac2fd8d17 100644 --- a/crates/nu-protocol/src/engine/pattern_match.rs +++ b/crates/nu-protocol/src/engine/pattern_match.rs @@ -94,7 +94,7 @@ impl Matcher for Pattern { } _ => false, }, - Pattern::Value(pattern_value) => { + Pattern::Expression(pattern_value) => { // TODO: Fill this out with the rest of them match &pattern_value.expr { Expr::Nothing => { @@ -205,6 +205,7 @@ impl Matcher for Pattern { _ => false, } } + Pattern::Value(pattern_value) => value == pattern_value, Pattern::Or(patterns) => { let mut result = false; diff --git a/crates/nu-protocol/src/ir/display.rs b/crates/nu-protocol/src/ir/display.rs index bc275f0bc5..98ea68db9d 100644 --- a/crates/nu-protocol/src/ir/display.rs +++ b/crates/nu-protocol/src/ir/display.rs @@ -419,11 +419,14 @@ impl<'a> fmt::Display for FmtPattern<'a> { } f.write_str("]") } - Pattern::Value(expr) => { + Pattern::Expression(expr) => { let string = String::from_utf8_lossy(self.engine_state.get_span_contents(expr.span)); f.write_str(&string) } + Pattern::Value(value) => { + f.write_str(&value.to_parsable_string(", ", &self.engine_state.config)) + } Pattern::Variable(var_id) => { let variable = FmtVar::new(self.engine_state, *var_id); write!(f, "{}", variable)