mirror of
https://github.com/nushell/nushell
synced 2025-01-13 21:55:07 +00:00
Change how and
and or
operations are compiled to IR to support custom values (#14653)
# Description Because `and` and `or` are short-circuiting operations in Nushell, they must be compiled to a sequence that avoids evaluating the RHS if the LHS is already sufficient to determine the output - i.e., `false` for `and` and `true` for `or`. I initially implemented this with `branch-if` instructions, simply returning the RHS if it needed to be evaluated, and returning the short-circuited boolean value if it did not. Example for `$a and $b`: ``` 0: load-variable %0, var 999 "$a" 1: branch-if %0, 3 2: jump 5 3: load-variable %0, var 1000 "$b" # label(0), from(1:) 4: jump 6 5: load-literal %0, bool(false) # label(1), from(2:) 6: span %0 # label(2), from(4:) 7: return %0 ``` Unfortunately, this broke polars, because using `and`/`or` on custom values is perfectly valid and they're allowed to define that behavior differently, and the polars plugin uses this for boolean masks. But without using the `binary-op` instruction, that custom behavior is never invoked. Additionally, `branch-if` requires a boolean, and custom values are not booleans. This changes the IR to the following, using the `match` instruction to check for the specific short-circuit value instead, and still invoking `binary-op` otherwise: ``` 0: load-variable %0, var 125 "$a" 1: match (false), %0, 4 2: load-variable %1, var 124 "$b" 3: binary-op %0, Boolean(And), %1 4: span %0 # label(0), from(1:) 5: return %0 ``` I've also renamed `Pattern::Value` to `Pattern::Expression` and added a proper `Pattern::Value` variant that actually contains a `Value` instead. I'm still hoping to remove `Pattern::Expression` eventually, because it's kind of a hack - we don't actually evaluate the expression, we just match it against a few cases specifically for pattern matching, and it's one of the cases where AST leaks into IR and I want to remove all of those cases, because AST should not leak into IR. Fixes #14518 # User-Facing Changes - `and` and `or` now support custom values again. - the IR is actually a little bit cleaner, though it may be a bit slower; `match` is more complex. # Tests + Formatting The existing tests pass, but I didn't add anything new. Unfortunately I don't think there's anything built-in to trigger this, but maybe some testcases could be added to polars to test it.
This commit is contained in:
parent
4b1f4e63c3
commit
35d2750757
9 changed files with 85 additions and 56 deletions
|
@ -1,4 +1,5 @@
|
||||||
use nu_protocol::{
|
use nu_protocol::{
|
||||||
|
ast::Pattern,
|
||||||
ir::{DataSlice, Instruction, IrAstRef, IrBlock, Literal},
|
ir::{DataSlice, Instruction, IrAstRef, IrBlock, Literal},
|
||||||
CompileError, IntoSpanned, RegId, Span, Spanned,
|
CompileError, IntoSpanned, RegId, Span, Spanned,
|
||||||
};
|
};
|
||||||
|
@ -426,6 +427,24 @@ impl BlockBuilder {
|
||||||
self.push(Instruction::Jump { index: label_id.0 }.into_spanned(span))
|
self.push(Instruction::Jump { index: label_id.0 }.into_spanned(span))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add a `match` instruction
|
||||||
|
pub(crate) fn r#match(
|
||||||
|
&mut self,
|
||||||
|
pattern: Pattern,
|
||||||
|
src: RegId,
|
||||||
|
label_id: LabelId,
|
||||||
|
span: Span,
|
||||||
|
) -> Result<(), CompileError> {
|
||||||
|
self.push(
|
||||||
|
Instruction::Match {
|
||||||
|
pattern: Box::new(pattern),
|
||||||
|
src,
|
||||||
|
index: label_id.0,
|
||||||
|
}
|
||||||
|
.into_spanned(span),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
/// The index that the next instruction [`.push()`](Self::push)ed will have.
|
/// The index that the next instruction [`.push()`](Self::push)ed will have.
|
||||||
pub(crate) fn here(&self) -> usize {
|
pub(crate) fn here(&self) -> usize {
|
||||||
self.instructions.len()
|
self.instructions.len()
|
||||||
|
|
|
@ -197,13 +197,11 @@ pub(crate) fn compile_match(
|
||||||
for (pattern, _) in match_block {
|
for (pattern, _) in match_block {
|
||||||
let match_label = builder.label(None);
|
let match_label = builder.label(None);
|
||||||
match_labels.push(match_label);
|
match_labels.push(match_label);
|
||||||
builder.push(
|
builder.r#match(
|
||||||
Instruction::Match {
|
pattern.pattern.clone(),
|
||||||
pattern: Box::new(pattern.pattern.clone()),
|
match_reg,
|
||||||
src: match_reg,
|
match_label,
|
||||||
index: match_label.0,
|
pattern.span,
|
||||||
}
|
|
||||||
.into_spanned(pattern.span),
|
|
||||||
)?;
|
)?;
|
||||||
// Also add a label for the next match instruction or failure case
|
// Also add a label for the next match instruction or failure case
|
||||||
next_labels.push(builder.label(Some(builder.here())));
|
next_labels.push(builder.label(Some(builder.here())));
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
use nu_protocol::{
|
use nu_protocol::{
|
||||||
ast::{Assignment, Boolean, CellPath, Expr, Expression, Math, Operator, PathMember},
|
ast::{Assignment, Boolean, CellPath, Expr, Expression, Math, Operator, PathMember, Pattern},
|
||||||
engine::StateWorkingSet,
|
engine::StateWorkingSet,
|
||||||
ir::{Instruction, Literal},
|
ir::{Instruction, Literal},
|
||||||
IntoSpanned, RegId, Span, Spanned, ENV_VARIABLE_ID,
|
IntoSpanned, RegId, Span, Spanned, Value, ENV_VARIABLE_ID,
|
||||||
};
|
};
|
||||||
use nu_utils::IgnoreCaseExt;
|
use nu_utils::IgnoreCaseExt;
|
||||||
|
|
||||||
|
@ -59,56 +59,52 @@ pub(crate) fn compile_binary_op(
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
match op.item {
|
match op.item {
|
||||||
// `and` / `or` are short-circuiting, and we can get by with one register and a branch
|
// `and` / `or` are short-circuiting, use `match` to avoid running the RHS if LHS is
|
||||||
Operator::Boolean(Boolean::And) => {
|
// the correct value. Be careful to support and/or on non-boolean values
|
||||||
let true_label = builder.label(None);
|
Operator::Boolean(bool_op @ Boolean::And)
|
||||||
builder.branch_if(lhs_reg, true_label, op.span)?;
|
| Operator::Boolean(bool_op @ Boolean::Or) => {
|
||||||
|
// `and` short-circuits on false, and `or` short-circuits on true.
|
||||||
|
let short_circuit_value = match bool_op {
|
||||||
|
Boolean::And => false,
|
||||||
|
Boolean::Or => true,
|
||||||
|
Boolean::Xor => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
// If the branch was not taken it's false, so short circuit to load false
|
// Short-circuit to return `lhs_reg`. `match` op does not consume `lhs_reg`.
|
||||||
let false_label = builder.label(None);
|
let short_circuit_label = builder.label(None);
|
||||||
builder.jump(false_label, op.span)?;
|
builder.r#match(
|
||||||
|
Pattern::Value(Value::bool(short_circuit_value, op.span)),
|
||||||
|
lhs_reg,
|
||||||
|
short_circuit_label,
|
||||||
|
op.span,
|
||||||
|
)?;
|
||||||
|
|
||||||
builder.set_label(true_label, builder.here())?;
|
// If the match failed then this was not the short-circuit value, so we have to run
|
||||||
|
// the RHS expression
|
||||||
|
let rhs_reg = builder.next_register()?;
|
||||||
compile_expression(
|
compile_expression(
|
||||||
working_set,
|
working_set,
|
||||||
builder,
|
builder,
|
||||||
rhs,
|
rhs,
|
||||||
RedirectModes::value(rhs.span),
|
RedirectModes::value(rhs.span),
|
||||||
None,
|
None,
|
||||||
lhs_reg,
|
rhs_reg,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let end_label = builder.label(None);
|
// It may seem intuitive that we can just return RHS here, but we do have to
|
||||||
builder.jump(end_label, op.span)?;
|
// actually execute the binary-op in case this is not a boolean
|
||||||
|
builder.push(
|
||||||
// Consumed by `branch-if`, so we have to set it false again
|
Instruction::BinaryOp {
|
||||||
builder.set_label(false_label, builder.here())?;
|
lhs_dst: lhs_reg,
|
||||||
builder.load_literal(lhs_reg, Literal::Bool(false).into_spanned(lhs.span))?;
|
op: Operator::Boolean(bool_op),
|
||||||
|
rhs: rhs_reg,
|
||||||
builder.set_label(end_label, builder.here())?;
|
}
|
||||||
}
|
.into_spanned(op.span),
|
||||||
Operator::Boolean(Boolean::Or) => {
|
|
||||||
let true_label = builder.label(None);
|
|
||||||
builder.branch_if(lhs_reg, true_label, op.span)?;
|
|
||||||
|
|
||||||
// If the branch was not taken it's false, so do the right-side expression
|
|
||||||
compile_expression(
|
|
||||||
working_set,
|
|
||||||
builder,
|
|
||||||
rhs,
|
|
||||||
RedirectModes::value(rhs.span),
|
|
||||||
None,
|
|
||||||
lhs_reg,
|
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let end_label = builder.label(None);
|
// In either the short-circuit case or other case, the result is in lhs_reg =
|
||||||
builder.jump(end_label, op.span)?;
|
// out_reg
|
||||||
|
builder.set_label(short_circuit_label, builder.here())?;
|
||||||
// Consumed by `branch-if`, so we have to set it true again
|
|
||||||
builder.set_label(true_label, builder.here())?;
|
|
||||||
builder.load_literal(lhs_reg, Literal::Bool(true).into_spanned(lhs.span))?;
|
|
||||||
|
|
||||||
builder.set_label(end_label, builder.here())?;
|
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
// Any other operator, via `binary-op`
|
// Any other operator, via `binary-op`
|
||||||
|
|
|
@ -644,7 +644,9 @@ fn flatten_pattern_into(match_pattern: &MatchPattern, output: &mut Vec<(Span, Fl
|
||||||
output.push((match_pattern.span, FlatShape::MatchPattern));
|
output.push((match_pattern.span, FlatShape::MatchPattern));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Pattern::Value(_) => output.push((match_pattern.span, FlatShape::MatchPattern)),
|
Pattern::Expression(_) | Pattern::Value(_) => {
|
||||||
|
output.push((match_pattern.span, FlatShape::MatchPattern))
|
||||||
|
}
|
||||||
Pattern::Variable(var_id) => output.push((match_pattern.span, FlatShape::VarDecl(*var_id))),
|
Pattern::Variable(var_id) => output.push((match_pattern.span, FlatShape::VarDecl(*var_id))),
|
||||||
Pattern::Rest(var_id) => output.push((match_pattern.span, FlatShape::VarDecl(*var_id))),
|
Pattern::Rest(var_id) => output.push((match_pattern.span, FlatShape::VarDecl(*var_id))),
|
||||||
Pattern::Or(patterns) => {
|
Pattern::Or(patterns) => {
|
||||||
|
|
|
@ -40,7 +40,7 @@ pub fn parse_pattern(working_set: &mut StateWorkingSet, span: Span) -> MatchPatt
|
||||||
let value = parse_value(working_set, span, &SyntaxShape::Any);
|
let value = parse_value(working_set, span, &SyntaxShape::Any);
|
||||||
|
|
||||||
MatchPattern {
|
MatchPattern {
|
||||||
pattern: Pattern::Value(Box::new(value)),
|
pattern: Pattern::Expression(Box::new(value)),
|
||||||
guard: None,
|
guard: None,
|
||||||
span,
|
span,
|
||||||
}
|
}
|
||||||
|
|
|
@ -6286,7 +6286,11 @@ pub fn discover_captures_in_pattern(pattern: &MatchPattern, seen: &mut Vec<VarId
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Pattern::Rest(var_id) => seen.push(*var_id),
|
Pattern::Rest(var_id) => seen.push(*var_id),
|
||||||
Pattern::Value(_) | Pattern::IgnoreValue | Pattern::IgnoreRest | Pattern::Garbage => {}
|
Pattern::Expression(_)
|
||||||
|
| Pattern::Value(_)
|
||||||
|
| Pattern::IgnoreValue
|
||||||
|
| Pattern::IgnoreRest
|
||||||
|
| Pattern::Garbage => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use super::Expression;
|
use super::Expression;
|
||||||
use crate::{Span, VarId};
|
use crate::{Span, Value, VarId};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
/// AST Node for match arm with optional match guard
|
/// AST Node for match arm with optional match guard
|
||||||
|
@ -23,10 +23,12 @@ pub enum Pattern {
|
||||||
Record(Vec<(String, MatchPattern)>),
|
Record(Vec<(String, MatchPattern)>),
|
||||||
/// List destructuring
|
/// List destructuring
|
||||||
List(Vec<MatchPattern>),
|
List(Vec<MatchPattern>),
|
||||||
/// Matching against a literal
|
/// Matching against a literal (from expression result)
|
||||||
// TODO: it would be nice if this didn't depend on AST
|
// TODO: it would be nice if this didn't depend on AST
|
||||||
// maybe const evaluation can get us to a Value instead?
|
// maybe const evaluation can get us to a Value instead?
|
||||||
Value(Box<Expression>),
|
Expression(Box<Expression>),
|
||||||
|
/// Matching against a literal (pure value)
|
||||||
|
Value(Value),
|
||||||
/// binding to a variable
|
/// binding to a variable
|
||||||
Variable(VarId),
|
Variable(VarId),
|
||||||
/// the `pattern1 \ pattern2` or-pattern
|
/// the `pattern1 \ pattern2` or-pattern
|
||||||
|
@ -62,7 +64,11 @@ impl Pattern {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Pattern::Rest(var_id) => output.push(*var_id),
|
Pattern::Rest(var_id) => output.push(*var_id),
|
||||||
Pattern::Value(_) | Pattern::IgnoreValue | Pattern::Garbage | Pattern::IgnoreRest => {}
|
Pattern::Expression(_)
|
||||||
|
| Pattern::Value(_)
|
||||||
|
| Pattern::IgnoreValue
|
||||||
|
| Pattern::Garbage
|
||||||
|
| Pattern::IgnoreRest => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
output
|
output
|
||||||
|
|
|
@ -94,7 +94,7 @@ impl Matcher for Pattern {
|
||||||
}
|
}
|
||||||
_ => false,
|
_ => false,
|
||||||
},
|
},
|
||||||
Pattern::Value(pattern_value) => {
|
Pattern::Expression(pattern_value) => {
|
||||||
// TODO: Fill this out with the rest of them
|
// TODO: Fill this out with the rest of them
|
||||||
match &pattern_value.expr {
|
match &pattern_value.expr {
|
||||||
Expr::Nothing => {
|
Expr::Nothing => {
|
||||||
|
@ -205,6 +205,7 @@ impl Matcher for Pattern {
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Pattern::Value(pattern_value) => value == pattern_value,
|
||||||
Pattern::Or(patterns) => {
|
Pattern::Or(patterns) => {
|
||||||
let mut result = false;
|
let mut result = false;
|
||||||
|
|
||||||
|
|
|
@ -419,11 +419,14 @@ impl<'a> fmt::Display for FmtPattern<'a> {
|
||||||
}
|
}
|
||||||
f.write_str("]")
|
f.write_str("]")
|
||||||
}
|
}
|
||||||
Pattern::Value(expr) => {
|
Pattern::Expression(expr) => {
|
||||||
let string =
|
let string =
|
||||||
String::from_utf8_lossy(self.engine_state.get_span_contents(expr.span));
|
String::from_utf8_lossy(self.engine_state.get_span_contents(expr.span));
|
||||||
f.write_str(&string)
|
f.write_str(&string)
|
||||||
}
|
}
|
||||||
|
Pattern::Value(value) => {
|
||||||
|
f.write_str(&value.to_parsable_string(", ", &self.engine_state.config))
|
||||||
|
}
|
||||||
Pattern::Variable(var_id) => {
|
Pattern::Variable(var_id) => {
|
||||||
let variable = FmtVar::new(self.engine_state, *var_id);
|
let variable = FmtVar::new(self.engine_state, *var_id);
|
||||||
write!(f, "{}", variable)
|
write!(f, "{}", variable)
|
||||||
|
|
Loading…
Reference in a new issue