fish-shell/fish-rust/src/tinyexpr.rs
Fabian Boehm 03402e572d format
2023-08-25 16:28:41 +02:00

720 lines
22 KiB
Rust

/*
* TINYEXPR - Tiny recursive descent parser and evaluation engine in C
*
* Copyright (c) 2015, 2016 Lewis Van Winkle
*
* http://CodePlea.com
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgement in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
// This version has been altered and ported to C++, then to Rust, for inclusion in fish.
use std::{
f64::{
consts::{E, PI, TAU},
INFINITY, NAN, NEG_INFINITY,
},
fmt::Debug,
ops::{BitAnd, BitOr, BitXor},
};
use crate::{
wchar::prelude::*,
wutil::{wcstod::wcstod_underscores, wgettext, Error as wcstodError},
};
#[derive(Clone, Copy)]
enum Function {
Constant(f64),
Fn0(fn() -> f64),
Fn1(fn(f64) -> f64),
Fn2(fn(f64, f64) -> f64),
FnN(fn(&[f64]) -> f64),
}
impl Debug for Function {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let variant = match self {
Function::Constant(n) => return f.debug_tuple("Function::Constant").field(n).finish(),
Function::Fn0(_) => "Fn0",
Function::Fn1(_) => "Fn1",
Function::Fn2(_) => "Fn2",
Function::FnN(_) => "FnN",
};
write!(f, "Function::{variant}(_)")
}
}
impl Function {
pub fn arity(&self) -> Option<usize> {
match self {
Function::Constant(_) => Some(0),
Function::Fn0(_) => Some(0),
Function::Fn1(_) => Some(1),
Function::Fn2(_) => Some(2),
Function::FnN(_) => None,
}
}
pub fn call(&self, args: &[f64]) -> f64 {
match (self, args) {
(Function::Constant(n), []) => *n,
(Function::Fn0(f), []) => f(),
(Function::Fn1(f), [a]) => f(*a),
(Function::Fn2(f), [a, b]) => f(*a, *b),
(Function::FnN(f), args) => f(args),
(_, _) => panic!("Incorrect number of arguments for function call"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ErrorKind {
UnknownFunction,
MissingClosingParen,
MissingOpenParen,
TooFewArgs,
TooManyArgs,
MissingOperator,
UnexpectedToken,
LogicalOperator,
DivByZero,
NumberTooLarge,
Unknown,
}
#[widestrs]
impl ErrorKind {
pub fn describe_wstr(&self) -> &'static wstr {
match self {
ErrorKind::UnknownFunction => wgettext!("Unknown function"),
ErrorKind::MissingClosingParen => wgettext!("Missing closing parenthesis"),
ErrorKind::MissingOpenParen => wgettext!("Missing opening parenthesis"),
ErrorKind::TooFewArgs => wgettext!("Too few arguments"),
ErrorKind::TooManyArgs => wgettext!("Too many arguments"),
ErrorKind::MissingOperator => wgettext!("Missing operator"),
ErrorKind::UnexpectedToken => wgettext!("Unexpected token"),
ErrorKind::LogicalOperator => {
wgettext!("Logical operations are not supported, use `test` instead")
}
ErrorKind::DivByZero => wgettext!("Division by zero"),
ErrorKind::NumberTooLarge => wgettext!("Number is too large"),
ErrorKind::Unknown => wgettext!("Expression is bogus"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Error {
pub kind: ErrorKind,
pub position: usize,
pub len: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Operator {
Add,
Sub,
Mul,
Div,
Pow,
Rem,
}
impl Operator {
pub fn eval(&self, a: f64, b: f64) -> f64 {
match self {
Operator::Add => a + b,
Operator::Sub => a - b,
Operator::Mul => a * b,
Operator::Div => a / b,
Operator::Pow => a.powf(b),
Operator::Rem => a % b,
}
}
}
#[derive(Debug, Clone, Copy)]
enum Token {
Null,
Error,
End,
Sep,
Open,
Close,
Number(f64),
Function(Function),
Infix(Operator),
}
struct State<'s> {
start: &'s wstr,
pos: usize,
current: Token,
error: Option<Error>,
}
fn bitwise_op(a: f64, b: f64, f: fn(u64, u64) -> u64) -> f64 {
// TODO: bounds checks
let a = a as u64;
let b = b as u64;
let result = f(a, b);
// TODO: bounds checks
result as f64
}
fn fac(n: f64) -> f64 {
if n < 0.0 {
return NAN;
}
if n > (u64::MAX as f64) {
return INFINITY;
}
let n = n as u64;
(1..=n)
.try_fold(1_u64, |acc, i| acc.checked_mul(i))
.map_or(INFINITY, |x| x as f64)
}
fn maximum(n: &[f64]) -> f64 {
n.iter().fold(NEG_INFINITY, |a, &b| {
if a.is_nan() {
return a;
}
if b.is_nan() {
return b;
}
if a == b {
// treat +0 as larger than -0
if a.is_sign_positive() {
a
} else {
b
}
} else if a > b {
a
} else {
b
}
})
}
fn minimum(n: &[f64]) -> f64 {
n.iter().fold(INFINITY, |a, &b| {
if a.is_nan() {
return a;
}
if b.is_nan() {
return b;
}
if a == b {
// treat -0 as smaller than +0
if a.is_sign_negative() {
a
} else {
b
}
} else if a < b {
a
} else {
b
}
})
}
fn ncr(n: f64, r: f64) -> f64 {
// Doing this for NAN takes ages - just return the result right away.
if n.is_nan() {
return INFINITY;
}
if n < 0.0 || r < 0.0 || n < r {
return NAN;
}
if n > (u64::MAX as f64) || r > (u64::MAX as f64) {
return INFINITY;
}
let un = n as u64;
let mut ur = r as u64;
if ur > un / 2 {
ur = un - ur
};
let mut result = 1_u64;
for i in 1..=ur {
let Some(next_result) = result.checked_mul(un - ur + i) else {
return INFINITY;
};
result = next_result / i;
}
result as f64
}
fn npr(n: f64, r: f64) -> f64 {
ncr(n, r) * fac(r)
}
#[widestrs]
const BUILTINS: &[(&wstr, Function)] = &[
// must be in alphabetical order
("abs"L, Function::Fn1(f64::abs)),
("acos"L, Function::Fn1(f64::acos)),
("asin"L, Function::Fn1(f64::asin)),
("atan"L, Function::Fn1(f64::atan)),
("atan2"L, Function::Fn2(f64::atan2)),
(
"bitand"L,
Function::Fn2(|a, b| bitwise_op(a, b, BitAnd::bitand)),
),
(
"bitor"L,
Function::Fn2(|a, b| bitwise_op(a, b, BitOr::bitor)),
),
(
"bitxor"L,
Function::Fn2(|a, b| bitwise_op(a, b, BitXor::bitxor)),
),
("ceil"L, Function::Fn1(f64::ceil)),
("cos"L, Function::Fn1(f64::cos)),
("cosh"L, Function::Fn1(f64::cosh)),
("e"L, Function::Constant(E)),
("exp"L, Function::Fn1(f64::exp)),
("fac"L, Function::Fn1(fac)),
("floor"L, Function::Fn1(f64::floor)),
("ln"L, Function::Fn1(f64::ln)),
("log"L, Function::Fn1(f64::log10)),
("log10"L, Function::Fn1(f64::log10)),
("log2"L, Function::Fn1(f64::log2)),
("max"L, Function::FnN(maximum)),
("min"L, Function::FnN(minimum)),
("ncr"L, Function::Fn2(ncr)),
("npr"L, Function::Fn2(npr)),
("pi"L, Function::Constant(PI)),
("pow"L, Function::Fn2(f64::powf)),
("round"L, Function::Fn1(f64::round)),
("sin"L, Function::Fn1(f64::sin)),
("sinh"L, Function::Fn1(f64::sinh)),
("sqrt"L, Function::Fn1(f64::sqrt)),
("tan"L, Function::Fn1(f64::tan)),
("tanh"L, Function::Fn1(f64::tanh)),
("tau"L, Function::Constant(TAU)),
];
assert_sorted_by_name!(BUILTINS, 0);
fn find_builtin(name: &wstr) -> Option<Function> {
let idx = BUILTINS
.binary_search_by_key(&name, |(name, _expr)| name)
.ok()?;
Some(BUILTINS[idx].1)
}
impl<'s> State<'s> {
pub fn new(input: &'s wstr) -> Self {
let mut state = Self {
start: input,
pos: 0,
current: Token::End,
error: None,
};
state.next_token();
state
}
pub fn error(&self) -> Result<(), Error> {
if let Token::End = self.current {
Ok(())
} else if let Some(error) = self.error {
Err(error)
} else {
// If we're not at the end but there's no error, then that means we have a
// superfluous token that we have no idea what to do with.
Err(Error {
kind: ErrorKind::TooManyArgs,
position: self.pos,
len: 0,
})
}
}
pub fn eval(&mut self) -> f64 {
return self.expr();
}
fn set_error(&mut self, kind: ErrorKind, pos_len: Option<(usize, usize)>) {
self.current = Token::Error;
let (position, len) = pos_len.unwrap_or((self.pos, 0));
self.error = Some(Error {
kind,
position,
len,
});
}
fn no_specific_error(&self) -> bool {
!matches!(self.current, Token::Error)
|| matches!(
self.error,
Some(Error {
kind: ErrorKind::Unknown,
..
})
)
}
/// Tries to get the next token from the input. If the input does not contain enough data for
/// another token, `None` is returned. Otherwise, the number of consumed characters is returned
/// along with either the token, or `None` in case of ignored (whitespace) input.
fn get_token(&mut self) -> Option<(usize, Option<Token>)> {
debug_assert!(!matches!(self.current, Token::Error));
let next = &self.start.as_char_slice().get(self.pos..)?;
// Try reading a number.
if matches!(next.first(), Some('0'..='9') | Some('.')) {
let mut consumed = 0;
match wcstod_underscores(*next, &mut consumed) {
Ok(num) => Some((consumed, Some(Token::Number(num)))),
Err(wcstodError::InvalidChar) => {
self.set_error(ErrorKind::Unknown, Some((self.pos + consumed, 1)));
return Some((consumed, Some(Token::Error)));
}
Err(wcstodError::Overflow) => {
self.set_error(ErrorKind::NumberTooLarge, Some((self.pos, consumed)));
return Some((consumed, Some(Token::Error)));
}
Err(wcstodError::Empty) => {
// We have a matches! above, this can't be?
unreachable!()
}
}
} else {
// Look for a function call.
// But not when it's an "x" followed by whitespace
// - that's the alternative multiplication operator.
if next.first()?.is_ascii_lowercase()
&& !(*next.first()? == 'x' && next.len() > 1 && next[1].is_whitespace())
{
let ident_len = next
.iter()
.position(|&c| !(c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_'))
.unwrap_or(next.len());
let ident = &next[..ident_len];
if let Some(var) = find_builtin(wstr::from_char_slice(ident)) {
return Some((ident_len, Some(Token::Function(var))));
} else if self.no_specific_error() {
// Our error is more specific, so it takes precedence.
self.set_error(ErrorKind::UnknownFunction, Some((self.pos, ident_len)));
}
Some((ident_len, Some(Token::Error)))
} else {
// Look for an operator or special character.
let tok = match next.first()? {
'+' => Token::Infix(Operator::Add),
'-' => Token::Infix(Operator::Sub),
'x' | '*' => Token::Infix(Operator::Mul),
'/' => Token::Infix(Operator::Div),
'^' => Token::Infix(Operator::Pow),
'%' => Token::Infix(Operator::Rem),
'(' => Token::Open,
')' => Token::Close,
',' => Token::Sep,
' ' | '\t' | '\n' | '\r' => return Some((1, None)),
'=' | '>' | '<' | '&' | '|' | '!' => {
self.set_error(ErrorKind::LogicalOperator, None);
Token::Error
}
_ => {
self.set_error(ErrorKind::MissingOperator, None);
Token::Error
}
};
Some((1, Some(tok)))
}
}
}
fn next_token(&mut self) {
self.current = loop {
let Some((consumed, token)) = self.get_token() else {
break Token::End;
};
self.pos += consumed;
if let Some(token) = token {
break token;
}
};
}
/// ```
/// <base> = <constant> |
/// <function-0> {"(" ")"} |
/// <function-1> <power> |
/// <function-X> "(" <expr> {"," <expr>} ")" |
/// "(" <list> ")"
/// ```
fn base(&mut self) -> f64 {
match self.current {
Token::Number(n) => {
let after_first = self.pos;
self.next_token();
if let Token::Number(_) | Token::Function(_) = self.current {
// Two numbers after each other:
// math '5 2'
// math '3 pi'
// (of course 3 pi could also be interpreted as 3 x pi)
// The error should be given *between*
// the last two tokens.
let num_whitespace = self.start[after_first..]
.chars()
.take_while(|&c| " \t\n\r".contains(c))
.count();
self.set_error(
ErrorKind::MissingOperator,
Some((after_first, num_whitespace)),
);
}
n
}
Token::Function(f) => {
self.next_token();
let have_open = matches!(self.current, Token::Open);
if have_open {
// If we *have* an opening parenthesis,
// we need to consume it and
// expect a closing one.
self.next_token();
}
if f.arity() == Some(0) {
if have_open {
if let Token::Close = self.current {
self.next_token();
} else if self.no_specific_error() {
self.set_error(ErrorKind::MissingClosingParen, None);
}
}
return match f {
Function::Fn0(f) => f(),
Function::Constant(n) => n,
_ => unreachable!("unhandled function type with arity 0"),
};
}
let mut parameters = vec![];
let mut i = 0;
let mut first_err = None;
for j in 0.. {
if f.arity() == Some(j) {
first_err = Some(self.pos - 1);
}
parameters.push(self.expr());
if !matches!(self.current, Token::Sep) {
break;
}
self.next_token();
i += 1;
}
if f.arity().is_none() || f.arity() == Some(i + 1) {
if !have_open {
return f.call(&parameters);
}
if let Token::Close = self.current {
// We have an opening and a closing paren, consume the closing one and done.
self.next_token();
return f.call(&parameters);
}
if !matches!(self.current, Token::Error) {
// If we had the right number of arguments, we're missing a closing paren.
self.set_error(ErrorKind::MissingClosingParen, None);
}
}
if !matches!(self.current, Token::Error)
|| matches!(
self.error,
Some(Error {
kind: ErrorKind::UnexpectedToken,
..
})
)
{
// Otherwise we complain about the number of arguments *first*,
// a closing parenthesis should be more obvious.
//
// Vararg functions need at least one argument.
let err = if f.arity().map(|arity| i < arity).unwrap_or(i == 0) {
ErrorKind::TooFewArgs
} else {
ErrorKind::TooManyArgs
};
let mut err_pos_len = None;
if let Some(first_err) = first_err {
let mut len = self.pos - first_err;
if !matches!(self.current, Token::Close) {
// TODO: Rationalize where we put the cursor exactly.
// If we have a closing paren it's on it, if we don't it's before the number.
len += 1;
}
if let Token::End = self.current {
// Don't place a caret after the end of string
len -= 1;
}
err_pos_len = Some((first_err, len));
}
self.set_error(err, err_pos_len);
}
NAN
}
Token::Open => {
self.next_token();
let ret = self.expr();
if let Token::Close = self.current {
self.next_token();
return ret;
}
if !matches!(self.current, Token::Error | Token::End) && self.error.is_none() {
self.set_error(ErrorKind::TooManyArgs, None)
} else if self.no_specific_error() {
self.set_error(ErrorKind::MissingClosingParen, None)
}
NAN
}
Token::End => {
// The expression ended before we expected it.
// e.g. `2 - `.
// This means we have too few things.
// Instead of introducing another error, just call it
// "too few args".
self.set_error(ErrorKind::TooFewArgs, None);
NAN
}
Token::Null | Token::Error | Token::Sep | Token::Close | Token::Infix(_) => {
if self.no_specific_error() {
self.set_error(ErrorKind::UnexpectedToken, None);
}
NAN
}
}
}
/// ```
/// <power> = {("-" | "+")} <base>
/// ```
fn power(&mut self) -> f64 {
let mut sign = 1.0;
while let Token::Infix(op) = self.current {
if op == Operator::Sub {
sign = -sign;
self.next_token();
} else if op == Operator::Add {
self.next_token();
} else {
break;
}
}
sign * self.base()
}
/// ```
/// <factor> = <power> {"^" <power>}
/// ```
fn factor(&mut self) -> f64 {
let mut ret = self.power();
if let Token::Infix(Operator::Pow) = self.current {
self.next_token();
ret = ret.powf(self.factor());
}
ret
}
/// ```
/// <term> = <factor> {("*" | "/" | "%") <factor>}
/// ```
fn term(&mut self) -> f64 {
let mut ret = self.factor();
while let Token::Infix(op @ (Operator::Mul | Operator::Div | Operator::Rem)) = self.current
{
let op_pos = self.pos - 1;
self.next_token();
let ret2 = self.factor();
if ret2 == 0.0 && [Operator::Div, Operator::Rem].contains(&op) {
// Division by zero (also for modulo)
// Error position is the "/" or "%" sign for now
self.set_error(ErrorKind::DivByZero, Some((op_pos, 1)));
}
ret = op.eval(ret, ret2);
}
ret
}
/// ```
/// <expr> = <term> {("+" | "-") <term>}
/// ```
fn expr(&mut self) -> f64 {
let mut ret = self.term();
while let Token::Infix(op @ (Operator::Add | Operator::Sub)) = self.current {
self.next_token();
ret = op.eval(ret, self.term());
}
ret
}
}
pub fn te_interp(expression: &wstr) -> Result<f64, Error> {
let mut s = State::new(expression);
let ret = s.eval();
match s.error() {
Ok(()) => Ok(ret),
Err(e) => Err(e),
}
}