Port math builtin, tinyexpr and wcstod_underscores to Rust

This commit is contained in:
Xiretza 2023-04-15 11:40:38 +00:00 committed by Johannes Altmanninger
parent cc744d30c0
commit aab2f660a7
16 changed files with 1171 additions and 1056 deletions

View file

@ -105,7 +105,7 @@ set(FISH_BUILTIN_SRCS
src/builtins/disown.cpp
src/builtins/eval.cpp src/builtins/fg.cpp
src/builtins/function.cpp src/builtins/functions.cpp src/builtins/history.cpp
src/builtins/jobs.cpp src/builtins/math.cpp src/builtins/path.cpp
src/builtins/jobs.cpp src/builtins/path.cpp
src/builtins/read.cpp src/builtins/set.cpp
src/builtins/set_color.cpp src/builtins/source.cpp src/builtins/status.cpp
src/builtins/string.cpp src/builtins/test.cpp src/builtins/ulimit.cpp
@ -123,7 +123,7 @@ set(FISH_SRCS
src/pager.cpp src/parse_execution.cpp src/parse_util.cpp
src/parser.cpp src/parser_keywords.cpp src/path.cpp src/postfork.cpp
src/proc.cpp src/re.cpp src/reader.cpp src/screen.cpp
src/signals.cpp src/tinyexpr.cpp src/utf8.cpp
src/signals.cpp src/utf8.cpp
src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp
src/wutil.cpp src/fds.cpp src/rustffi.cpp
)

View file

@ -0,0 +1,318 @@
use libc::c_int;
use std::borrow::Cow;
use widestring_suffix::widestrs;
use super::shared::{
builtin_missing_argument, builtin_print_help, io_streams_t, BUILTIN_ERR_COMBO2,
BUILTIN_ERR_MIN_ARG_COUNT1, STATUS_CMD_ERROR, STATUS_CMD_OK, STATUS_INVALID_ARGS,
};
use crate::common::{read_blocked, str2wcstring};
use crate::ffi::parser_t;
use crate::tinyexpr::te_interp;
use crate::wchar::{wstr, WString};
use crate::wgetopt::{wgetopter_t, wopt, woption, woption_argument_t};
use crate::wutil::{fish_wcstoi, perror, sprintf, wgettext_fmt};
/// The maximum number of points after the decimal that we'll print.
const DEFAULT_SCALE: usize = 6;
/// The end of the range such that every integer is representable as a double.
/// i.e. this is the first value such that x + 1 == x (or == x + 2, depending on rounding mode).
const MAX_CONTIGUOUS_INTEGER: f64 = (1_u64 << f64::MANTISSA_DIGITS) as f64;
struct Options {
print_help: bool,
scale: usize,
base: usize,
}
#[widestrs]
fn parse_cmd_opts(
args: &mut [&wstr],
parser: &mut parser_t,
streams: &mut io_streams_t,
) -> Result<(Options, usize), Option<c_int>> {
const cmd: &wstr = "math"L;
let print_hints = true;
// This command is atypical in using the "+" (REQUIRE_ORDER) option for flag parsing.
// This is needed because of the minus, `-`, operator in math expressions.
const SHORT_OPTS: &wstr = "+:hs:b:"L;
const LONG_OPTS: &[woption] = &[
wopt("scale"L, woption_argument_t::required_argument, 's'),
wopt("base"L, woption_argument_t::required_argument, 'b'),
wopt("help"L, woption_argument_t::no_argument, 'h'),
];
let mut opts = Options {
print_help: false,
scale: DEFAULT_SCALE,
base: 10,
};
let mut have_scale = false;
let mut w = wgetopter_t::new(SHORT_OPTS, LONG_OPTS, args);
while let Some(c) = w.wgetopt_long() {
match c {
's' => {
let optarg = w.woptarg.unwrap();
have_scale = true;
// "max" is the special value that tells us to pick the maximum scale.
opts.scale = if optarg == "max"L {
15
} else if let Ok(base) = fish_wcstoi(optarg) {
base
} else {
streams.err.append(wgettext_fmt!(
"%ls: %ls: invalid base value\n",
cmd,
optarg
));
return Err(STATUS_INVALID_ARGS);
};
}
'b' => {
let optarg = w.woptarg.unwrap();
opts.base = if optarg == "hex"L {
16
} else if optarg == "octal"L {
8
} else if let Ok(base) = fish_wcstoi(optarg) {
base
} else {
streams.err.append(wgettext_fmt!(
"%ls: %ls: invalid base value\n",
cmd,
optarg
));
return Err(STATUS_INVALID_ARGS);
};
}
'h' => {
opts.print_help = true;
}
':' => {
builtin_missing_argument(parser, streams, cmd, args[w.woptind - 1], print_hints);
return Err(STATUS_INVALID_ARGS);
}
'?' => {
// For most commands this is an error. We ignore it because a math expression
// can begin with a minus sign.
return Ok((opts, w.woptind - 1));
}
_ => {
panic!("unexpected retval from wgeopter.next()");
}
}
}
if have_scale && opts.scale != 0 && opts.base != 10 {
streams.err.append(wgettext_fmt!(
BUILTIN_ERR_COMBO2,
cmd,
"non-zero scale value only valid
for base 10"
));
return Err(STATUS_INVALID_ARGS);
}
Ok((opts, w.woptind))
}
/// We read from stdin if we are the second or later process in a pipeline.
fn use_args_from_stdin(streams: &io_streams_t) -> bool {
streams.stdin_is_directly_redirected()
}
/// Get the arguments from stdin.
fn get_arg_from_stdin(streams: &io_streams_t) -> Option<WString> {
let mut s = Vec::new();
loop {
let mut buf = [0];
let c = match read_blocked(streams.stdin_fd().unwrap(), &mut buf) {
1 => buf[0],
0 => {
// EOF
if s.is_empty() {
return None;
} else {
break;
}
}
n if n < 0 => {
// error
perror("read");
return None;
}
n => panic!("Unexpected return value from read_blocked(): {n}"),
};
if c == b'\n' {
// we're done
break;
}
s.push(c);
}
Some(str2wcstring(&s))
}
/// Get the arguments from argv or stdin based on the execution context. This mimics how builtin
/// `string` does it.
fn get_arg<'args>(
argidx: &mut usize,
args: &'args [&'args wstr],
streams: &io_streams_t,
) -> Option<Cow<'args, wstr>> {
if use_args_from_stdin(streams) {
assert!(
streams.stdin_fd().is_some(),
"stdin should not be closed since it is directly redirected"
);
get_arg_from_stdin(streams).map(Cow::Owned)
} else {
let ret = args.get(*argidx).copied().map(Cow::Borrowed);
*argidx += 1;
ret
}
}
/// Return a formatted version of the value `v` respecting the given `opts`.
fn format_double(mut v: f64, opts: &Options) -> WString {
if opts.base == 16 {
v = v.trunc();
let mneg = if v.is_sign_negative() { "-" } else { "" };
return sprintf!("%s0x%lx", mneg, v.abs() as u64);
} else if opts.base == 8 {
v = v.trunc();
if v == 0.0 {
// not 00
return WString::from_str("0");
}
let mneg = if v.is_sign_negative() { "-" } else { "" };
return sprintf!("%s0%lo", mneg, v.abs() as u64);
}
// As a special-case, a scale of 0 means to truncate to an integer
// instead of rounding.
if opts.scale == 0 {
v = v.trunc();
return sprintf!("%.*f", opts.scale, v);
}
let mut ret = sprintf!("%.*f", opts.scale, v);
// If we contain a decimal separator, trim trailing zeros after it, and then the separator
// itself if there's nothing after it. Detect a decimal separator as a non-digit.
if ret.chars().any(|c| !c.is_ascii_digit()) {
let trailing_zeroes = ret.chars().rev().take_while(|&c| c == '0').count();
let mut to_keep = ret.len() - trailing_zeroes;
if ret.as_char_slice()[to_keep - 1] == '.' {
to_keep -= 1;
}
ret.truncate(to_keep);
}
// If we trimmed everything it must have just been zero.
// TODO: can this ever happen?
if ret.is_empty() {
ret.push('0');
}
ret
}
#[widestrs]
fn evaluate_expression(
cmd: &wstr,
streams: &mut io_streams_t,
opts: &Options,
expression: &wstr,
) -> Option<c_int> {
let ret = te_interp(expression);
match ret {
Ok(n) => {
// Check some runtime errors after the fact.
// TODO: Really, this should be done in tinyexpr
// (e.g. infinite is the result of "x / 0"),
// but that's much more work.
let error_message = if n.is_infinite() {
"Result is infinite"L
} else if n.is_nan() {
"Result is not a number"L
} else if n.abs() >= MAX_CONTIGUOUS_INTEGER {
"Result magnitude is too large"L
} else {
let mut s = format_double(n, opts);
s.push('\n');
streams.out.append(s);
return STATUS_CMD_OK;
};
streams
.err
.append(sprintf!("%ls: Error: %ls\n"L, cmd, error_message));
streams.err.append(sprintf!("'%ls'\n"L, expression));
STATUS_CMD_ERROR
}
Err(err) => {
streams.err.append(sprintf!(
"%ls: Error: %ls\n"L,
cmd,
err.kind.describe_wstr()
));
streams.err.append(sprintf!("'%ls'\n"L, expression));
let padding = WString::from_chars(vec![' '; err.position + 1]);
if err.len >= 2 {
let tildes = WString::from_chars(vec!['~'; err.len - 2]);
streams.err.append(sprintf!("%ls^%ls^\n"L, padding, tildes));
} else {
streams.err.append(sprintf!("%ls^\n"L, padding));
}
STATUS_CMD_ERROR
}
}
}
/// The math builtin evaluates math expressions.
#[widestrs]
pub fn math(
parser: &mut parser_t,
streams: &mut io_streams_t,
argv: &mut [&wstr],
) -> Option<c_int> {
let cmd = argv[0];
let (opts, mut optind) = match parse_cmd_opts(argv, parser, streams) {
Ok(x) => x,
Err(e) => return e,
};
if opts.print_help {
builtin_print_help(parser, streams, cmd);
return STATUS_CMD_OK;
}
let mut expression = WString::new();
while let Some(arg) = get_arg(&mut optind, argv, streams) {
if !expression.is_empty() {
expression.push(' ')
}
expression.push_utfstr(&arg);
}
if expression.is_empty() {
streams
.err
.append(wgettext_fmt!(BUILTIN_ERR_MIN_ARG_COUNT1, cmd, 1, 0));
return STATUS_CMD_ERROR;
}
evaluate_expression(cmd, streams, &opts, &expression)
}

View file

@ -9,6 +9,7 @@ pub mod contains;
pub mod echo;
pub mod emit;
pub mod exit;
pub mod math;
pub mod printf;
pub mod pwd;
pub mod random;

View file

@ -175,6 +175,7 @@ pub fn run_builtin(
RustBuiltin::Echo => super::echo::echo(parser, streams, args),
RustBuiltin::Emit => super::emit::emit(parser, streams, args),
RustBuiltin::Exit => super::exit::exit(parser, streams, args),
RustBuiltin::Math => super::math::math(parser, streams, args),
RustBuiltin::Pwd => super::pwd::pwd(parser, streams, args),
RustBuiltin::Random => super::random::random(parser, streams, args),
RustBuiltin::Realpath => super::realpath::realpath(parser, streams, args),

View file

@ -50,6 +50,7 @@ mod smoke;
mod termsize;
mod threads;
mod timer;
mod tinyexpr;
mod tokenizer;
mod topic_monitor;
mod trace;

707
fish-rust/src/tinyexpr.rs Normal file
View file

@ -0,0 +1,707 @@
/*
* TINYEXPR - Tiny recursive descent parser and evaluation engine in C
*
* Copyright (c) 2015, 2016 Lewis Van Winkle
*
* http://CodePlea.com
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgement in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
// This version has been altered and ported to C++, then to Rust, for inclusion in fish.
use std::{
f64::{
consts::{E, PI, TAU},
INFINITY, NAN, NEG_INFINITY,
},
fmt::Debug,
ops::{BitAnd, BitOr, BitXor},
};
use widestring_suffix::widestrs;
use crate::{
wchar::wstr,
wutil::{wcstod::wcstod_underscores, wgettext},
};
#[derive(Clone, Copy)]
enum Function {
Constant(f64),
Fn0(fn() -> f64),
Fn1(fn(f64) -> f64),
Fn2(fn(f64, f64) -> f64),
FnN(fn(&[f64]) -> f64),
}
impl Debug for Function {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let variant = match self {
Function::Constant(n) => return f.debug_tuple("Function::Constant").field(n).finish(),
Function::Fn0(_) => "Fn0",
Function::Fn1(_) => "Fn1",
Function::Fn2(_) => "Fn2",
Function::FnN(_) => "FnN",
};
write!(f, "Function::{variant}(_)")
}
}
impl Function {
pub fn arity(&self) -> Option<usize> {
match self {
Function::Constant(_) => Some(0),
Function::Fn0(_) => Some(0),
Function::Fn1(_) => Some(1),
Function::Fn2(_) => Some(2),
Function::FnN(_) => None,
}
}
pub fn call(&self, args: &[f64]) -> f64 {
match (self, args) {
(Function::Constant(n), []) => *n,
(Function::Fn0(f), []) => f(),
(Function::Fn1(f), [a]) => f(*a),
(Function::Fn2(f), [a, b]) => f(*a, *b),
(Function::FnN(f), args) => f(args),
(_, _) => panic!("Incorrect number of arguments for function call"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ErrorKind {
UnknownFunction,
MissingClosingParen,
MissingOpenParen,
TooFewArgs,
TooManyArgs,
MissingOperator,
UnexpectedToken,
LogicalOperator,
DivByZero,
Unknown,
}
#[widestrs]
impl ErrorKind {
pub fn describe_wstr(&self) -> &'static wstr {
match self {
ErrorKind::UnknownFunction => wgettext!("Unknown function"),
ErrorKind::MissingClosingParen => wgettext!("Missing closing parenthesis"),
ErrorKind::MissingOpenParen => wgettext!("Missing opening parenthesis"),
ErrorKind::TooFewArgs => wgettext!("Too few arguments"),
ErrorKind::TooManyArgs => wgettext!("Too many arguments"),
ErrorKind::MissingOperator => wgettext!("Missing operator"),
ErrorKind::UnexpectedToken => wgettext!("Unexpected token"),
ErrorKind::LogicalOperator => {
wgettext!("Logical operations are not supported, use `test` instead")
}
ErrorKind::DivByZero => wgettext!("Division by zero"),
ErrorKind::Unknown => wgettext!("Expression is bogus"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Error {
pub kind: ErrorKind,
pub position: usize,
pub len: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Operator {
Add,
Sub,
Mul,
Div,
Pow,
Rem,
}
impl Operator {
pub fn eval(&self, a: f64, b: f64) -> f64 {
match self {
Operator::Add => a + b,
Operator::Sub => a - b,
Operator::Mul => a * b,
Operator::Div => a / b,
Operator::Pow => a.powf(b),
Operator::Rem => a % b,
}
}
}
#[derive(Debug, Clone, Copy)]
enum Token {
Null,
Error,
End,
Sep,
Open,
Close,
Number(f64),
Function(Function),
Infix(Operator),
}
struct State<'s> {
start: &'s wstr,
pos: usize,
current: Token,
error: Option<Error>,
}
fn bitwise_op(a: f64, b: f64, f: fn(u64, u64) -> u64) -> f64 {
// TODO: bounds checks
let a = a as u64;
let b = b as u64;
let result = f(a, b);
// TODO: bounds checks
result as f64
}
fn fac(n: f64) -> f64 {
if n < 0.0 {
return NAN;
}
if n > (u64::MAX as f64) {
return INFINITY;
}
let n = n as u64;
(1..=n)
.try_fold(1_u64, |acc, i| acc.checked_mul(i))
.map_or(INFINITY, |x| x as f64)
}
fn maximum(n: &[f64]) -> f64 {
n.iter().fold(NEG_INFINITY, |a, &b| {
if a.is_nan() {
return a;
}
if b.is_nan() {
return b;
}
if a == b {
// treat +0 as larger than -0
if a.is_sign_positive() {
a
} else {
b
}
} else if a > b {
a
} else {
b
}
})
}
fn minimum(n: &[f64]) -> f64 {
n.iter().fold(INFINITY, |a, &b| {
if a.is_nan() {
return a;
}
if b.is_nan() {
return b;
}
if a == b {
// treat -0 as smaller than +0
if a.is_sign_negative() {
a
} else {
b
}
} else if a < b {
a
} else {
b
}
})
}
fn ncr(n: f64, r: f64) -> f64 {
// Doing this for NAN takes ages - just return the result right away.
if n.is_nan() {
return INFINITY;
}
if n < 0.0 || r < 0.0 || n < r {
return NAN;
}
if n > (u64::MAX as f64) || r > (u64::MAX as f64) {
return INFINITY;
}
let un = n as u64;
let mut ur = r as u64;
if ur > un / 2 {
ur = un - ur
};
let mut result = 1_u64;
for i in 1..=ur {
let Some(next_result) = result.checked_mul(un - ur + i) else {
return INFINITY;
};
result = next_result / i;
}
result as f64
}
fn npr(n: f64, r: f64) -> f64 {
ncr(n, r) * fac(r)
}
#[widestrs]
const BUILTINS: &[(&wstr, Function)] = &[
// must be in alphabetical order
("abs"L, Function::Fn1(f64::abs)),
("acos"L, Function::Fn1(f64::acos)),
("asin"L, Function::Fn1(f64::asin)),
("atan"L, Function::Fn1(f64::atan)),
("atan2"L, Function::Fn2(f64::atan2)),
(
"bitand"L,
Function::Fn2(|a, b| bitwise_op(a, b, BitAnd::bitand)),
),
(
"bitor"L,
Function::Fn2(|a, b| bitwise_op(a, b, BitOr::bitor)),
),
(
"bitxor"L,
Function::Fn2(|a, b| bitwise_op(a, b, BitXor::bitxor)),
),
("ceil"L, Function::Fn1(f64::ceil)),
("cos"L, Function::Fn1(f64::cos)),
("cosh"L, Function::Fn1(f64::cosh)),
("e"L, Function::Constant(E)),
("exp"L, Function::Fn1(f64::exp)),
("fac"L, Function::Fn1(fac)),
("floor"L, Function::Fn1(f64::floor)),
("ln"L, Function::Fn1(f64::ln)),
("log"L, Function::Fn1(f64::log10)),
("log10"L, Function::Fn1(f64::log10)),
("log2"L, Function::Fn1(f64::log2)),
("max"L, Function::FnN(maximum)),
("min"L, Function::FnN(minimum)),
("ncr"L, Function::Fn2(ncr)),
("npr"L, Function::Fn2(npr)),
("pi"L, Function::Constant(PI)),
("pow"L, Function::Fn2(f64::powf)),
("round"L, Function::Fn1(f64::round)),
("sin"L, Function::Fn1(f64::sin)),
("sinh"L, Function::Fn1(f64::sinh)),
("sqrt"L, Function::Fn1(f64::sqrt)),
("tan"L, Function::Fn1(f64::tan)),
("tanh"L, Function::Fn1(f64::tanh)),
("tau"L, Function::Constant(TAU)),
];
assert_sorted_by_name!(BUILTINS, 0);
fn find_builtin(name: &wstr) -> Option<Function> {
let idx = BUILTINS
.binary_search_by_key(&name, |(name, _expr)| name)
.ok()?;
Some(BUILTINS[idx].1)
}
impl<'s> State<'s> {
pub fn new(input: &'s wstr) -> Self {
let mut state = Self {
start: input,
pos: 0,
current: Token::End,
error: None,
};
state.next_token();
state
}
pub fn error(&self) -> Result<(), Error> {
if let Token::End = self.current {
Ok(())
} else if let Some(error) = self.error {
Err(error)
} else {
// If we're not at the end but there's no error, then that means we have a
// superfluous token that we have no idea what to do with.
Err(Error {
kind: ErrorKind::TooManyArgs,
position: self.pos,
len: 0,
})
}
}
pub fn eval(&mut self) -> f64 {
return self.expr();
}
fn set_error(&mut self, kind: ErrorKind, pos_len: Option<(usize, usize)>) {
self.current = Token::Error;
let (position, len) = pos_len.unwrap_or((self.pos, 0));
self.error = Some(Error {
kind,
position,
len,
});
}
fn no_specific_error(&self) -> bool {
!matches!(self.current, Token::Error)
|| matches!(
self.error,
Some(Error {
kind: ErrorKind::Unknown,
..
})
)
}
/// Tries to get the next token from the input. If the input does not contain enough data for
/// another token, `None` is returned. Otherwise, the number of consumed characters is returned
/// along with either the token, or `None` in case of ignored (whitespace) input.
fn get_token(&mut self) -> Option<(usize, Option<Token>)> {
debug_assert!(!matches!(self.current, Token::Error));
let next = &self.start.as_char_slice().get(self.pos..)?;
// Try reading a number.
if matches!(next.first(), Some('0'..='9') | Some('.')) {
let mut consumed = 0;
let num = wcstod_underscores(*next, &mut consumed).unwrap();
Some((consumed, Some(Token::Number(num))))
} else {
// Look for a function call.
// But not when it's an "x" followed by whitespace
// - that's the alternative multiplication operator.
if next.first()?.is_ascii_lowercase()
&& !(*next.first()? == 'x' && next.len() > 1 && next[1].is_whitespace())
{
let ident_len = next
.iter()
.position(|&c| !(c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_'))
.unwrap_or(next.len());
let ident = &next[..ident_len];
if let Some(var) = find_builtin(wstr::from_char_slice(ident)) {
return Some((ident_len, Some(Token::Function(var))));
} else if self.no_specific_error() {
// Our error is more specific, so it takes precedence.
self.set_error(ErrorKind::UnknownFunction, Some((self.pos, ident_len)));
}
Some((ident_len, Some(Token::Error)))
} else {
// Look for an operator or special character.
let tok = match next.first()? {
'+' => Token::Infix(Operator::Add),
'-' => Token::Infix(Operator::Sub),
'x' | '*' => Token::Infix(Operator::Mul),
'/' => Token::Infix(Operator::Div),
'^' => Token::Infix(Operator::Pow),
'%' => Token::Infix(Operator::Rem),
'(' => Token::Open,
')' => Token::Close,
',' => Token::Sep,
' ' | '\t' | '\n' | '\r' => return Some((1, None)),
'=' | '>' | '<' | '&' | '|' | '!' => {
self.set_error(ErrorKind::LogicalOperator, None);
Token::Error
}
_ => {
self.set_error(ErrorKind::MissingOperator, None);
Token::Error
}
};
Some((1, Some(tok)))
}
}
}
fn next_token(&mut self) {
self.current = loop {
let Some((consumed, token)) = self.get_token() else {
break Token::End;
};
self.pos += consumed;
if let Some(token) = token {
break token;
}
};
}
/// ```
/// <base> = <constant> |
/// <function-0> {"(" ")"} |
/// <function-1> <power> |
/// <function-X> "(" <expr> {"," <expr>} ")" |
/// "(" <list> ")"
/// ```
fn base(&mut self) -> f64 {
match self.current {
Token::Number(n) => {
let after_first = self.pos;
self.next_token();
if let Token::Number(_) | Token::Function(_) = self.current {
// Two numbers after each other:
// math '5 2'
// math '3 pi'
// (of course 3 pi could also be interpreted as 3 x pi)
// The error should be given *between*
// the last two tokens.
let num_whitespace = self.start[after_first..]
.chars()
.take_while(|&c| " \t\n\r".contains(c))
.count();
self.set_error(
ErrorKind::MissingOperator,
Some((after_first, num_whitespace)),
);
}
n
}
Token::Function(f) => {
self.next_token();
let have_open = matches!(self.current, Token::Open);
if have_open {
// If we *have* an opening parenthesis,
// we need to consume it and
// expect a closing one.
self.next_token();
}
if f.arity() == Some(0) {
if have_open {
if let Token::Close = self.current {
self.next_token();
} else if self.no_specific_error() {
self.set_error(ErrorKind::MissingClosingParen, None);
}
}
return match f {
Function::Fn0(f) => f(),
Function::Constant(n) => n,
_ => unreachable!("unhandled function type with arity 0"),
};
}
let mut parameters = vec![];
let mut i = 0;
let mut first_err = None;
for j in 0.. {
if f.arity() == Some(j) {
first_err = Some(self.pos - 1);
}
parameters.push(self.expr());
if !matches!(self.current, Token::Sep) {
break;
}
self.next_token();
i += 1;
}
if f.arity().is_none() || f.arity() == Some(i + 1) {
if !have_open {
return f.call(&parameters);
}
if let Token::Close = self.current {
// We have an opening and a closing paren, consume the closing one and done.
self.next_token();
return f.call(&parameters);
}
if !matches!(self.current, Token::Error) {
// If we had the right number of arguments, we're missing a closing paren.
self.set_error(ErrorKind::MissingClosingParen, None);
}
}
if !matches!(self.current, Token::Error)
|| matches!(
self.error,
Some(Error {
kind: ErrorKind::UnexpectedToken,
..
})
)
{
// Otherwise we complain about the number of arguments *first*,
// a closing parenthesis should be more obvious.
//
// Vararg functions need at least one argument.
let err = if f.arity().map(|arity| i < arity).unwrap_or(i == 0) {
ErrorKind::TooFewArgs
} else {
ErrorKind::TooManyArgs
};
let mut err_pos_len = None;
if let Some(first_err) = first_err {
let mut len = self.pos - first_err;
if !matches!(self.current, Token::Close) {
// TODO: Rationalize where we put the cursor exactly.
// If we have a closing paren it's on it, if we don't it's before the number.
len += 1;
}
if let Token::End = self.current {
// Don't place a caret after the end of string
len -= 1;
}
err_pos_len = Some((first_err, len));
}
self.set_error(err, err_pos_len);
}
NAN
}
Token::Open => {
self.next_token();
let ret = self.expr();
if let Token::Close = self.current {
self.next_token();
return ret;
}
if !matches!(self.current, Token::Error | Token::End) && self.error.is_none() {
self.set_error(ErrorKind::TooManyArgs, None)
} else if self.no_specific_error() {
self.set_error(ErrorKind::MissingClosingParen, None)
}
NAN
}
Token::End => {
// The expression ended before we expected it.
// e.g. `2 - `.
// This means we have too few things.
// Instead of introducing another error, just call it
// "too few args".
self.set_error(ErrorKind::TooFewArgs, None);
NAN
}
Token::Null | Token::Error | Token::Sep | Token::Close | Token::Infix(_) => {
if self.no_specific_error() {
self.set_error(ErrorKind::UnexpectedToken, None);
}
NAN
}
}
}
/// ```
/// <power> = {("-" | "+")} <base>
/// ```
fn power(&mut self) -> f64 {
let mut sign = 1.0;
while let Token::Infix(op) = self.current {
if op == Operator::Sub {
sign = -sign;
self.next_token();
} else if op == Operator::Add {
self.next_token();
} else {
break;
}
}
sign * self.base()
}
/// ```
/// <factor> = <power> {"^" <power>}
/// ```
fn factor(&mut self) -> f64 {
let mut ret = self.power();
if let Token::Infix(Operator::Pow) = self.current {
self.next_token();
ret = ret.powf(self.factor());
}
ret
}
/// ```
/// <term> = <factor> {("*" | "/" | "%") <factor>}
/// ```
fn term(&mut self) -> f64 {
let mut ret = self.factor();
while let Token::Infix(op @ (Operator::Mul | Operator::Div | Operator::Rem)) = self.current
{
let op_pos = self.pos - 1;
self.next_token();
let ret2 = self.factor();
if ret2 == 0.0 && [Operator::Div, Operator::Rem].contains(&op) {
// Division by zero (also for modulo)
// Error position is the "/" or "%" sign for now
self.set_error(ErrorKind::DivByZero, Some((op_pos, 1)));
}
ret = op.eval(ret, ret2);
}
ret
}
/// ```
/// <expr> = <term> {("+" | "-") <term>}
/// ```
fn expr(&mut self) -> f64 {
let mut ret = self.term();
while let Token::Infix(op @ (Operator::Add | Operator::Sub)) = self.current {
self.next_token();
ret = op.eval(ret, self.term());
}
ret
}
}
pub fn te_interp(expression: &wstr) -> Result<f64, Error> {
let mut s = State::new(expression);
let ret = s.eval();
match s.error() {
Ok(()) => Ok(ret),
Err(e) => Err(e),
}
}

View file

@ -111,6 +111,90 @@ fn hexponent_error(e: hexponent::ParseError) -> Error {
}
}
/// Like [`wcstod()`], but allows underscore separators. Leading, trailing, and multiple underscores
/// are allowed, as are underscores next to decimal (`.`), exponent (`E`/`e`/`P`/`p`), and
/// hexadecimal (`X`/`x`) delimiters. This consumes trailing underscores -- `consumed` will include
/// the last underscore which is legal to include in a parse (according to the above rules).
/// Free-floating leading underscores (`"_ 3"`) are not allowed and will result in a no-parse.
/// Underscores are not allowed before or inside of `"infinity"` or `"nan"` input. Trailing
/// underscores after `"infinity"` or `"nan"` are not consumed.
pub fn wcstod_underscores<Chars>(s: Chars, consumed: &mut usize) -> Result<f64, Error>
where
Chars: IntoCharIter,
{
let mut chars = s.chars().peekable();
let mut leading_whitespace = 0;
// Skip leading whitespace.
while let Some(c) = chars.peek() {
if c.is_ascii_whitespace() {
leading_whitespace += 1;
chars.next();
} else {
break;
}
}
let is_sign = |c: char| "+-".contains(c);
let is_inf_or_nan_char = |c: char| "iInN".contains(c);
// We don't do any underscore-stripping for infinity/NaN.
let mut is_inf_nan = false;
if let Some(&c1) = chars.peek() {
if is_inf_or_nan_char(c1) {
is_inf_nan = true;
} else if is_sign(c1) {
// FIXME make this more efficient
let mut copy = chars.clone();
copy.next();
if let Some(&c2) = copy.peek() {
if is_inf_or_nan_char(c2) {
is_inf_nan = true;
}
}
}
}
if is_inf_nan {
let f = wcstod_inner(chars, '.', consumed)?;
*consumed += leading_whitespace;
return Ok(f);
}
// We build a string to pass to the system wcstod, pruned of underscores. We will take all
// leading alphanumeric characters that can appear in a strtod numeric literal, dots (.), and
// signs (+/-). In order to be more clever, for example to stop earlier in the case of strings
// like "123xxxxx", we would need to do a full parse, because sometimes 'a' is a hex digit and
// sometimes it is the end of the parse, sometimes a dot '.' is a decimal delimiter and
// sometimes it is the end of the valid parse, as in "1_2.3_4.5_6", etc.
let mut pruned = vec![];
// We keep track of the positions *in the pruned string* where there used to be underscores. We
// will pass the pruned version of the input string to the system wcstod, which in turn will
// tell us how many characters it consumed. Then we will set our own endptr based on (1) the
// number of characters consumed from the pruned string, and (2) how many underscores came
// before the last consumed character. The alternative to doing it this way (for example, "only
// deleting the correct underscores") would require actually parsing the input string, so that
// we can know when to stop grabbing characters and dropping underscores, as in "1_2.3_4.5_6".
let mut underscores = vec![];
// If we wanted to future-proof against a strtod from the future that, say, allows octal
// literals using 0o, etc., we could just use iswalnum, instead of iswxdigit and P/p/X/x checks.
for c in chars.take_while(|&c| c.is_ascii_hexdigit() || "PpXx._".contains(c) || is_sign(c)) {
if c == '_' {
underscores.push(pruned.len());
} else {
pruned.push(c)
}
}
let mut pruned_consumed = 0;
let f = wcstod_inner(pruned.into_iter(), '.', &mut pruned_consumed)?;
let underscores_consumed = underscores
.into_iter()
.take_while(|&n| n <= pruned_consumed)
.count();
*consumed = leading_whitespace + pruned_consumed + underscores_consumed;
Ok(f)
}
#[cfg(test)]
mod test {
#![allow(overflowing_literals)]
@ -507,4 +591,56 @@ mod test {
assert_eq!(result, val);
assert_eq!(consumed, exp_consumed);
}
#[test]
fn wcstod_underscores() {
let test = |s| {
let mut consumed = 0;
super::wcstod_underscores(s, &mut consumed).map(|f| (f, consumed))
};
assert_eq!(test("123"), Ok((123.0, 3)));
assert_eq!(test("123"), Ok((123.0, 3)));
assert_eq!(test("1_2.3_4.5_6"), Ok((12.34, 7)));
assert_eq!(test("1_2"), Ok((12.0, 3)));
assert_eq!(test("1_._2"), Ok((1.2, 5)));
assert_eq!(test("1__2"), Ok((12.0, 4)));
assert_eq!(test(" 1__2 3__4 "), Ok((12.0, 5)));
assert_eq!(test("1_2 3_4"), Ok((12.0, 3)));
assert_eq!(test(" 1"), Ok((1.0, 2)));
assert_eq!(test(" 1_"), Ok((1.0, 3)));
assert_eq!(test(" 1__"), Ok((1.0, 4)));
assert_eq!(test(" 1___"), Ok((1.0, 5)));
assert_eq!(test(" 1___ 2___"), Ok((1.0, 5)));
assert_eq!(test(" _1"), Ok((1.0, 3)));
assert_eq!(test("1 "), Ok((1.0, 1)));
assert_eq!(test("infinity_"), Ok((f64::INFINITY, 8)));
assert_eq!(test(" -INFINITY"), Ok((f64::NEG_INFINITY, 10)));
assert_eq!(test("_infinity"), Err(Error::Empty));
/*
{
let (f, n) = test("nan(0)").unwrap();
assert!(f.is_nan());
assert_eq!(n, 6);
}
{
let (f, n) = test("nan(0)_").unwrap();
assert!(f.is_nan());
assert_eq!(n, 6);
}
*/
assert_eq!(test("_nan(0)"), Err(Error::Empty));
// We don't strip the underscores in this commented-out test case, and the behavior is
// implementation-defined, so we don't actually know how many characters will get consumed. On
// macOS the strtod man page only says what happens with an alphanumeric string passed to nan(),
// but the strtod consumes all of the characters even if there are underscores.
// assert_eq!(test("nan(0_1_2)"), Ok((nan(0_1_2), 3)));
assert_eq!(test(" _ 1"), Err(Error::Empty));
assert_eq!(test("0x_dead_beef"), Ok((0xdeadbeef_u32 as f64, 12)));
assert_eq!(test("None"), Err(Error::InvalidChar));
assert_eq!(test(" None"), Err(Error::InvalidChar));
assert_eq!(test("Also none"), Err(Error::InvalidChar));
assert_eq!(test(" Also none"), Err(Error::InvalidChar));
}
}

View file

@ -40,7 +40,6 @@
#include "builtins/functions.h"
#include "builtins/history.h"
#include "builtins/jobs.h"
#include "builtins/math.h"
#include "builtins/path.h"
#include "builtins/read.h"
#include "builtins/set.h"
@ -385,7 +384,7 @@ static constexpr builtin_data_t builtin_datas[] = {
{L"history", &builtin_history, N_(L"History of commands executed by user")},
{L"if", &builtin_generic, N_(L"Evaluate block if condition is true")},
{L"jobs", &builtin_jobs, N_(L"Print currently running jobs")},
{L"math", &builtin_math, N_(L"Evaluate math expressions")},
{L"math", &implemented_in_rust, N_(L"Evaluate math expressions")},
{L"not", &builtin_generic, N_(L"Negate exit status of job")},
{L"or", &builtin_generic, N_(L"Execute command if previous command failed")},
{L"path", &builtin_path, N_(L"Handle paths")},
@ -550,6 +549,9 @@ static maybe_t<RustBuiltin> try_get_rust_builtin(const wcstring &cmd) {
if (cmd == L"exit") {
return RustBuiltin::Exit;
}
if (cmd == L"math") {
return RustBuiltin::Math;
}
if (cmd == L"pwd") {
return RustBuiltin::Pwd;
}

View file

@ -120,6 +120,7 @@ enum class RustBuiltin : int32_t {
Echo,
Emit,
Exit,
Math,
Printf,
Pwd,
Random,

View file

@ -1,302 +0,0 @@
// Implementation of the math builtin.
#include "config.h" // IWYU pragma: keep
#include "math.h"
#include <cerrno>
#include <cmath>
#include <cwchar>
#include <limits>
#include <string>
#include "../builtin.h"
#include "../common.h"
#include "../fallback.h" // IWYU pragma: keep
#include "../io.h"
#include "../maybe.h"
#include "../tinyexpr.h"
#include "../wgetopt.h"
#include "../wutil.h" // IWYU pragma: keep
// The maximum number of points after the decimal that we'll print.
static constexpr int kDefaultScale = 6;
// The end of the range such that every integer is representable as a double.
// i.e. this is the first value such that x + 1 == x (or == x + 2, depending on rounding mode).
static constexpr double kMaximumContiguousInteger =
double(1LLU << std::numeric_limits<double>::digits);
struct math_cmd_opts_t {
bool print_help = false;
bool have_scale = false;
int scale = kDefaultScale;
int base = 10;
};
// This command is atypical in using the "+" (REQUIRE_ORDER) option for flag parsing.
// This is needed because of the minus, `-`, operator in math expressions.
static const wchar_t *const short_options = L"+:hs:b:";
static const struct woption long_options[] = {{L"scale", required_argument, 's'},
{L"base", required_argument, 'b'},
{L"help", no_argument, 'h'},
{}};
static int parse_cmd_opts(math_cmd_opts_t &opts, int *optind, //!OCLINT(high ncss method)
int argc, const wchar_t **argv, parser_t &parser, io_streams_t &streams) {
const wchar_t *cmd = L"math";
int opt;
wgetopter_t w;
while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, nullptr)) != -1) {
switch (opt) {
case 's': {
opts.have_scale = true;
// "max" is the special value that tells us to pick the maximum scale.
if (std::wcscmp(w.woptarg, L"max") == 0) {
opts.scale = 15;
} else {
opts.scale = fish_wcstoi(w.woptarg);
if (errno || opts.scale < 0 || opts.scale > 15) {
streams.err.append_format(_(L"%ls: %ls: invalid scale value\n"), cmd,
w.woptarg);
return STATUS_INVALID_ARGS;
}
}
break;
}
case 'b': {
if (std::wcscmp(w.woptarg, L"hex") == 0) {
opts.base = 16;
} else if (std::wcscmp(w.woptarg, L"octal") == 0) {
opts.base = 8;
} else {
opts.base = fish_wcstoi(w.woptarg);
if (errno || (opts.base != 8 && opts.base != 16)) {
streams.err.append_format(_(L"%ls: %ls: invalid base value\n"), cmd,
w.woptarg);
return STATUS_INVALID_ARGS;
}
}
break;
}
case 'h': {
opts.print_help = true;
break;
}
case ':': {
builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1]);
return STATUS_INVALID_ARGS;
}
case '?': {
// For most commands this is an error. We ignore it because a math expression
// can begin with a minus sign.
*optind = w.woptind - 1;
return STATUS_CMD_OK;
}
default: {
DIE("unexpected retval from wgetopt_long");
}
}
}
if (opts.have_scale && opts.scale != 0 && opts.base != 10) {
streams.err.append_format(BUILTIN_ERR_COMBO2, cmd,
L"non-zero scale value only valid for base 10");
return STATUS_INVALID_ARGS;
}
*optind = w.woptind;
return STATUS_CMD_OK;
}
// We read from stdin if we are the second or later process in a pipeline.
static bool math_args_from_stdin(const io_streams_t &streams) {
return streams.stdin_is_directly_redirected;
}
/// Get the arguments from stdin.
static const wchar_t *math_get_arg_stdin(wcstring *storage, const io_streams_t &streams) {
std::string arg;
for (;;) {
char ch = '\0';
long rc = read_blocked(streams.stdin_fd, &ch, 1);
if (rc < 0) { // error
wperror(L"read");
return nullptr;
}
if (rc == 0) { // EOF
if (arg.empty()) return nullptr;
break;
}
if (ch == '\n') break; // we're done
arg += ch;
}
*storage = str2wcstring(arg);
return storage->c_str();
}
/// Return the next argument from argv.
static const wchar_t *math_get_arg_argv(int *argidx, const wchar_t **argv) {
return argv && argv[*argidx] ? argv[(*argidx)++] : nullptr;
}
/// Get the arguments from argv or stdin based on the execution context. This mimics how builtin
/// `string` does it.
static const wchar_t *math_get_arg(int *argidx, const wchar_t **argv, wcstring *storage,
const io_streams_t &streams) {
if (math_args_from_stdin(streams)) {
assert(streams.stdin_fd >= 0 &&
"stdin should not be closed since it is directly redirected");
return math_get_arg_stdin(storage, streams);
}
return math_get_arg_argv(argidx, argv);
}
static const wchar_t *math_describe_error(const te_error_t &error) {
if (error.position == 0) return L"NO ERROR";
switch (error.type) {
case TE_ERROR_NONE:
DIE("Error has no position");
case TE_ERROR_UNKNOWN_FUNCTION:
return _(L"Unknown function");
case TE_ERROR_MISSING_CLOSING_PAREN:
return _(L"Missing closing parenthesis");
case TE_ERROR_MISSING_OPENING_PAREN:
return _(L"Missing opening parenthesis");
case TE_ERROR_TOO_FEW_ARGS:
return _(L"Too few arguments");
case TE_ERROR_TOO_MANY_ARGS:
return _(L"Too many arguments");
case TE_ERROR_MISSING_OPERATOR:
return _(L"Missing operator");
case TE_ERROR_UNEXPECTED_TOKEN:
return _(L"Unexpected token");
case TE_ERROR_LOGICAL_OPERATOR:
return _(L"Logical operations are not supported, use `test` instead");
case TE_ERROR_DIV_BY_ZERO:
return _(L"Division by zero");
case TE_ERROR_UNKNOWN:
return _(L"Expression is bogus");
default:
return L"Unknown error";
}
}
/// Return a formatted version of the value \p v respecting the given \p opts.
static wcstring format_double(double v, const math_cmd_opts_t &opts) {
if (opts.base == 16) {
v = trunc(v);
const char *mneg = (v < 0.0 ? "-" : "");
return format_string(L"%s0x%llx", mneg, (long long)std::fabs(v));
} else if (opts.base == 8) {
v = trunc(v);
if (v == 0.0) return L"0"; // not 00
const char *mneg = (v < 0.0 ? "-" : "");
return format_string(L"%s0%llo", mneg, (long long)std::fabs(v));
}
// As a special-case, a scale of 0 means to truncate to an integer
// instead of rounding.
if (opts.scale == 0) {
v = trunc(v);
return format_string(L"%.*f", opts.scale, v);
}
wcstring ret = format_string(L"%.*f", opts.scale, v);
// If we contain a decimal separator, trim trailing zeros after it, and then the separator
// itself if there's nothing after it. Detect a decimal separator as a non-digit.
const wchar_t *const digits = L"0123456789";
if (ret.find_first_not_of(digits) != wcstring::npos) {
while (ret.back() == L'0') {
ret.pop_back();
}
if (!std::wcschr(digits, ret.back())) {
ret.pop_back();
}
}
// If we trimmed everything it must have just been zero.
if (ret.empty()) {
ret.push_back(L'0');
}
return ret;
}
/// Evaluate math expressions.
static int evaluate_expression(const wchar_t *cmd, const parser_t &parser, io_streams_t &streams,
const math_cmd_opts_t &opts, wcstring &expression) {
UNUSED(parser);
int retval = STATUS_CMD_OK;
te_error_t error;
double v = te_interp(expression.c_str(), &error);
if (error.position == 0) {
// Check some runtime errors after the fact.
// TODO: Really, this should be done in tinyexpr
// (e.g. infinite is the result of "x / 0"),
// but that's much more work.
const wchar_t *error_message = nullptr;
if (std::isinf(v)) {
error_message = L"Result is infinite";
} else if (std::isnan(v)) {
error_message = L"Result is not a number";
} else if (std::fabs(v) >= kMaximumContiguousInteger) {
error_message = L"Result magnitude is too large";
}
if (error_message) {
streams.err.append_format(L"%ls: Error: %ls\n", cmd, error_message);
streams.err.append_format(L"'%ls'\n", expression.c_str());
retval = STATUS_CMD_ERROR;
} else {
streams.out.append(format_double(v, opts) + L"\n");
}
} else {
streams.err.append_format(L"%ls: Error: %ls\n", cmd, math_describe_error(error));
streams.err.append_format(L"'%ls'\n", expression.c_str());
if (error.len >= 2) {
wcstring tildes(error.len - 2, L'~');
streams.err.append_format(L"%*ls%ls%ls%ls\n", error.position - 1, L" ", L"^",
tildes.c_str(), L"^");
} else {
streams.err.append_format(L"%*ls%ls\n", error.position - 1, L" ", L"^");
}
retval = STATUS_CMD_ERROR;
}
return retval;
}
/// The math builtin evaluates math expressions.
maybe_t<int> builtin_math(parser_t &parser, io_streams_t &streams, const wchar_t **argv) {
const wchar_t *cmd = argv[0];
int argc = builtin_count_args(argv);
math_cmd_opts_t opts;
int optind;
// Is this really the right way to handle no expression present?
// if (argc == 0) return STATUS_CMD_OK;
int retval = parse_cmd_opts(opts, &optind, argc, argv, parser, streams);
if (retval != STATUS_CMD_OK) return retval;
if (opts.print_help) {
builtin_print_help(parser, streams, cmd);
return STATUS_CMD_OK;
}
wcstring expression;
wcstring storage;
while (const wchar_t *arg = math_get_arg(&optind, argv, &storage, streams)) {
if (!expression.empty()) expression.push_back(L' ');
expression.append(arg);
}
if (expression.empty()) {
streams.err.append_format(BUILTIN_ERR_MIN_ARG_COUNT1, cmd, 1, 0);
return STATUS_CMD_ERROR;
}
return evaluate_expression(cmd, parser, streams, opts, expression);
}

View file

@ -1,11 +0,0 @@
// Prototypes for executing builtin_math function.
#ifndef FISH_BUILTIN_MATH_H
#define FISH_BUILTIN_MATH_H
#include "../maybe.h"
class parser_t;
struct io_streams_t;
maybe_t<int> builtin_math(parser_t &parser, io_streams_t &streams, const wchar_t **argv);
#endif

View file

@ -2886,49 +2886,6 @@ static void test_wcstod() {
tod_test(L"nope", "nope");
}
static void test_fish_wcstod_underscores() {
say(L"Testing fish_wcstod_underscores");
auto test_case = [](const wchar_t *s, size_t expected_num_consumed) {
wchar_t *endptr = nullptr;
fish_wcstod_underscores(s, &endptr);
size_t num_consumed = (size_t)(endptr - (wchar_t *)s);
do_test(expected_num_consumed == num_consumed);
};
test_case(L"123", 3);
test_case(L"1_2.3_4.5_6", 7);
test_case(L"1_2", 3);
test_case(L"1_._2", 5);
test_case(L"1__2", 4);
test_case(L" 1__2 3__4 ", 5);
test_case(L"1_2 3_4", 3);
test_case(L" 1", 2);
test_case(L" 1_", 3);
test_case(L" 1__", 4);
test_case(L" 1___", 5);
test_case(L" 1___ 2___", 5);
test_case(L" _1", 3);
test_case(L"1 ", 1);
test_case(L"infinity_", 8);
test_case(L" -INFINITY", 10);
test_case(L"_infinity", 0);
test_case(L"nan(0)", 6);
test_case(L"nan(0)_", 6);
test_case(L"_nan(0)", 0);
// We don't strip the underscores in this commented-out test case, and the behavior is
// implementation-defined, so we don't actually know how many characters will get consumed. On
// macOS the strtod man page only says what happens with an alphanumeric string passed to nan(),
// but the strtod consumes all of the characters even if there are underscores.
// test_case(L"nan(0_1_2)", 3);
test_case(L" _ 1", 0);
test_case(L"0x_dead_beef", 12);
test_case(L"None", 0);
test_case(L" None", 0);
test_case(L"Also none", 0);
test_case(L" Also none", 0);
}
static void test_dup2s() {
using std::make_shared;
io_chain_t chain;
@ -6836,7 +6793,6 @@ static const test_t s_tests[]{
{TEST_GROUP("abbreviations"), test_abbreviations},
{TEST_GROUP("builtins/test"), test_test},
{TEST_GROUP("wcstod"), test_wcstod},
{TEST_GROUP("fish_wcstod_underscores"), test_fish_wcstod_underscores},
{TEST_GROUP("dup2s"), test_dup2s},
{TEST_GROUP("dup2s"), test_dup2s_fd_for_target_fd},
{TEST_GROUP("path"), test_path},

View file

@ -1,578 +0,0 @@
/*
* TINYEXPR - Tiny recursive descent parser and evaluation engine in C
*
* Copyright (c) 2015, 2016 Lewis Van Winkle
*
* http://CodePlea.com
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgement in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
// This version has been altered and ported to C++ for inclusion in fish.
#include "config.h"
#include "tinyexpr.h"
#include <ctype.h>
#include <limits.h>
#include <algorithm>
#include <cmath>
#include <cwchar>
#include <iterator>
#include <limits>
#include <vector>
#include "common.h"
#include "fallback.h" // IWYU pragma: keep
#include "wutil.h"
struct te_fun_t {
using fn_va = double (*)(const std::vector<double> &);
using fn_2 = double (*)(double, double);
using fn_1 = double (*)(double);
using fn_0 = double (*)();
constexpr te_fun_t(double val) : type_{CONSTANT}, arity_{0}, value{val} {}
constexpr te_fun_t(fn_0 fn) : type_{FN_FIXED}, arity_{0}, fun0{fn} {}
constexpr te_fun_t(fn_1 fn) : type_{FN_FIXED}, arity_{1}, fun1{fn} {}
constexpr te_fun_t(fn_2 fn) : type_{FN_FIXED}, arity_{2}, fun2{fn} {}
constexpr te_fun_t(fn_va fn) : type_{FN_VARIADIC}, arity_{-1}, fun_va{fn} {}
bool operator==(fn_2 fn) const { return arity_ == 2 && fun2 == fn; }
__warn_unused int arity() const { return arity_; }
double operator()() const {
assert(arity_ == 0);
return type_ == CONSTANT ? value : fun0();
}
double operator()(double a, double b) const {
assert(arity_ == 2);
return fun2(a, b);
}
double operator()(const std::vector<double> &args) const {
if (type_ == FN_VARIADIC) return fun_va(args);
if (arity_ != static_cast<int>(args.size())) return NAN;
switch (arity_) {
case 0:
return type_ == CONSTANT ? value : fun0();
case 1:
return fun1(args[0]);
case 2:
return fun2(args[0], args[1]);
}
return NAN;
}
private:
enum {
CONSTANT,
FN_FIXED,
FN_VARIADIC,
} type_;
int arity_;
union {
double value;
fn_0 fun0;
fn_1 fun1;
fn_2 fun2;
fn_va fun_va;
};
};
enum te_state_type_t {
TOK_NULL,
TOK_ERROR,
TOK_END,
TOK_SEP,
TOK_OPEN,
TOK_CLOSE,
TOK_NUMBER,
TOK_FUNCTION,
TOK_INFIX
};
struct state {
explicit state(const wchar_t *expr) : start_{expr}, next_{expr} { next_token(); }
double eval() { return expr(); }
__warn_unused te_error_t error() const {
if (type_ == TOK_END) return {TE_ERROR_NONE, 0, 0};
// If we have an error position set, use that,
// otherwise the current position.
const wchar_t *tok = errpos_ ? errpos_ : next_;
te_error_t err{error_, static_cast<int>(tok - start_) + 1, errlen_};
if (error_ == TE_ERROR_NONE) {
// If we're not at the end but there's no error, then that means we have a
// superfluous token that we have no idea what to do with.
err.type = TE_ERROR_TOO_MANY_ARGS;
}
return err;
}
private:
te_state_type_t type_{TOK_NULL};
te_error_type_t error_{TE_ERROR_NONE};
const wchar_t *start_;
const wchar_t *next_;
const wchar_t *errpos_{nullptr};
int errlen_{0};
te_fun_t current_{NAN};
void next_token();
double expr();
double power();
double base();
double factor();
double term();
};
static double fac(double a) { /* simplest version of fac */
if (a < 0.0) return NAN;
if (a > UINT_MAX) return INFINITY;
auto ua = static_cast<unsigned int>(a);
unsigned long int result = 1, i;
for (i = 1; i <= ua; i++) {
if (i > ULONG_MAX / result) return INFINITY;
result *= i;
}
return static_cast<double>(result);
}
static double ncr(double n, double r) {
// Doing this for NAN takes ages - just return the result right away.
if (std::isnan(n)) return INFINITY;
if (n < 0.0 || r < 0.0 || n < r) return NAN;
if (n > UINT_MAX || r > UINT_MAX) return INFINITY;
unsigned long int un = static_cast<unsigned int>(n), ur = static_cast<unsigned int>(r), i;
unsigned long int result = 1;
if (ur > un / 2) ur = un - ur;
for (i = 1; i <= ur; i++) {
if (result > ULONG_MAX / (un - ur + i)) return INFINITY;
result *= un - ur + i;
result /= i;
}
return result;
}
static double npr(double n, double r) { return ncr(n, r) * fac(r); }
static constexpr double bit_and(double a, double b) {
return static_cast<double>(static_cast<long long>(a) & static_cast<long long>(b));
}
static constexpr double bit_or(double a, double b) {
return static_cast<double>(static_cast<long long>(a) | static_cast<long long>(b));
}
static constexpr double bit_xor(double a, double b) {
return static_cast<double>(static_cast<long long>(a) ^ static_cast<long long>(b));
}
static double max(double a, double b) {
if (std::isnan(a)) return a;
if (std::isnan(b)) return b;
if (a == b) return std::signbit(a) ? b : a; // treat +0 as larger than -0
return a > b ? a : b;
}
static double min(double a, double b) {
if (std::isnan(a)) return a;
if (std::isnan(b)) return b;
if (a == b) return std::signbit(a) ? a : b; // treat -0 as smaller than +0
return a < b ? a : b;
}
static double maximum(const std::vector<double> &args) {
double ret = -std::numeric_limits<double>::infinity();
for (auto a : args) ret = max(ret, a);
return ret;
}
static double minimum(const std::vector<double> &args) {
double ret = std::numeric_limits<double>::infinity();
for (auto a : args) ret = min(ret, a);
return ret;
}
struct te_builtin {
const wchar_t *name;
te_fun_t fn;
};
static constexpr te_builtin functions[] = {
/* must be in alphabetical order */
// clang-format off
{L"abs", std::fabs},
{L"acos", std::acos},
{L"asin", std::asin},
{L"atan", std::atan},
{L"atan2", std::atan2},
{L"bitand", bit_and},
{L"bitor", bit_or},
{L"bitxor", bit_xor},
{L"ceil", std::ceil},
{L"cos", std::cos},
{L"cosh", std::cosh},
{L"e", M_E},
{L"exp", std::exp},
{L"fac", fac},
{L"floor", std::floor},
{L"ln", std::log},
{L"log", std::log10},
{L"log10", std::log10},
{L"log2", std::log2},
{L"max", maximum},
{L"min", minimum},
{L"ncr", ncr},
{L"npr", npr},
{L"pi", M_PI},
{L"pow", std::pow},
{L"round", std::round},
{L"sin", std::sin},
{L"sinh", std::sinh},
{L"sqrt", std::sqrt},
{L"tan", std::tan},
{L"tanh", std::tanh},
{L"tau", 2 * M_PI},
// clang-format on
};
ASSERT_SORTED_BY_NAME(functions);
static const te_builtin *find_builtin(const wchar_t *name, int len) {
const auto end = std::end(functions);
const te_builtin *found = std::lower_bound(std::begin(functions), end, name,
[len](const te_builtin &lhs, const wchar_t *rhs) {
// The length is important because that's where
// the parens start
return std::wcsncmp(lhs.name, rhs, len) < 0;
});
// We need to compare again because we might have gotten the first "larger" element.
if (found != end && std::wcsncmp(found->name, name, len) == 0 && found->name[len] == 0)
return found;
return nullptr;
}
static constexpr double add(double a, double b) { return a + b; }
static constexpr double sub(double a, double b) { return a - b; }
static constexpr double mul(double a, double b) { return a * b; }
static constexpr double divide(double a, double b) {
// If b isn't zero, divide.
// If a isn't zero, return signed INFINITY.
// Else, return NAN.
return b ? a / b : a ? copysign(1, a) * copysign(1, b) * INFINITY : NAN;
}
void state::next_token() {
type_ = TOK_NULL;
do {
if (!*next_) {
type_ = TOK_END;
return;
}
/* Try reading a number. */
if ((next_[0] >= '0' && next_[0] <= '9') || next_[0] == '.') {
current_ = fish_wcstod_underscores(next_, const_cast<wchar_t **>(&next_));
type_ = TOK_NUMBER;
} else {
/* Look for a function call. */
// But not when it's an "x" followed by whitespace
// - that's the alternative multiplication operator.
if (next_[0] >= 'a' && next_[0] <= 'z' && !(next_[0] == 'x' && isspace(next_[1]))) {
const wchar_t *start = next_;
while ((next_[0] >= 'a' && next_[0] <= 'z') ||
(next_[0] >= '0' && next_[0] <= '9') || (next_[0] == '_'))
next_++;
const te_builtin *var = find_builtin(start, next_ - start);
if (var) {
type_ = TOK_FUNCTION;
current_ = var->fn;
} else if (type_ != TOK_ERROR || error_ == TE_ERROR_UNKNOWN) {
// Our error is more specific, so it takes precedence.
type_ = TOK_ERROR;
error_ = TE_ERROR_UNKNOWN_FUNCTION;
errpos_ = start + 1;
errlen_ = next_ - start;
}
} else {
/* Look for an operator or special character. */
switch (next_++[0]) {
case '+':
type_ = TOK_INFIX;
current_ = add;
break;
case '-':
type_ = TOK_INFIX;
current_ = sub;
break;
case 'x':
case '*':
// We've already checked for whitespace above.
type_ = TOK_INFIX;
current_ = mul;
break;
case '/':
type_ = TOK_INFIX;
current_ = divide;
break;
case '^':
type_ = TOK_INFIX;
current_ = pow;
break;
case '%':
type_ = TOK_INFIX;
current_ = fmod;
break;
case '(':
type_ = TOK_OPEN;
break;
case ')':
type_ = TOK_CLOSE;
break;
case ',':
type_ = TOK_SEP;
break;
case ' ':
case '\t':
case '\n':
case '\r':
break;
case '=':
case '>':
case '<':
case '&':
case '|':
case '!':
type_ = TOK_ERROR;
error_ = TE_ERROR_LOGICAL_OPERATOR;
break;
default:
type_ = TOK_ERROR;
error_ = TE_ERROR_MISSING_OPERATOR;
break;
}
}
}
} while (type_ == TOK_NULL);
}
double state::base() {
/* <base> = <constant> | <function-0> {"(" ")"} | <function-1> <power> |
* <function-X> "(" <expr> {"," <expr>} ")" | "(" <list> ")" */
auto next = next_;
switch (type_) {
case TOK_NUMBER: {
auto val = current_();
next_token();
if (type_ == TOK_NUMBER || type_ == TOK_FUNCTION) {
// Two numbers after each other:
// math '5 2'
// math '3 pi'
// (of course 3 pi could also be interpreted as 3 x pi)
type_ = TOK_ERROR;
error_ = TE_ERROR_MISSING_OPERATOR;
// The error should be given *between*
// the last two tokens.
errpos_ = next + 1;
// Go to the end of whitespace and then one more.
while (wcschr(L" \t\n\r", next[0])) {
next++;
}
next++;
errlen_ = next - errpos_;
}
return val;
}
case TOK_FUNCTION: {
auto fn = current_;
int arity = fn.arity();
next_token();
const bool have_open = type_ == TOK_OPEN;
if (have_open) {
// If we *have* an opening parenthesis,
// we need to consume it and
// expect a closing one.
next_token();
}
if (arity == 0) {
if (have_open) {
if (type_ == TOK_CLOSE) {
next_token();
} else if (type_ != TOK_ERROR || error_ == TE_ERROR_UNKNOWN) {
type_ = TOK_ERROR;
error_ = TE_ERROR_MISSING_CLOSING_PAREN;
break;
}
}
return fn();
}
std::vector<double> parameters;
int i;
const wchar_t *first_err = nullptr;
for (i = 0;; i++) {
if (i == arity) first_err = next_;
parameters.push_back(expr());
if (type_ != TOK_SEP) {
break;
}
next_token();
}
if (arity < 0 || i == arity - 1) {
if (!have_open) {
return fn(parameters);
}
if (type_ == TOK_CLOSE) {
// We have an opening and a closing paren, consume the closing one and done.
next_token();
return fn(parameters);
}
if (type_ != TOK_ERROR) {
// If we had the right number of arguments, we're missing a closing paren.
error_ = TE_ERROR_MISSING_CLOSING_PAREN;
type_ = TOK_ERROR;
}
}
if (type_ != TOK_ERROR || error_ == TE_ERROR_UNEXPECTED_TOKEN) {
// Otherwise we complain about the number of arguments *first*,
// a closing parenthesis should be more obvious.
//
// Vararg functions need at least one argument.
error_ = (i < arity || (arity == -1 && i == 0)) ? TE_ERROR_TOO_FEW_ARGS
: TE_ERROR_TOO_MANY_ARGS;
type_ = TOK_ERROR;
if (first_err) {
errpos_ = first_err;
errlen_ = next_ - first_err;
// TODO: Rationalize where we put the cursor exactly.
// If we have a closing paren it's on it, if we don't it's before the number.
if (type_ != TOK_CLOSE) errlen_++;
}
}
break;
}
case TOK_OPEN: {
next_token();
auto ret = expr();
if (type_ == TOK_CLOSE) {
next_token();
return ret;
}
if (type_ != TOK_ERROR && type_ != TOK_END && error_ == TE_ERROR_NONE) {
type_ = TOK_ERROR;
error_ = TE_ERROR_TOO_MANY_ARGS;
} else if (type_ != TOK_ERROR || error_ == TE_ERROR_UNKNOWN) {
type_ = TOK_ERROR;
error_ = TE_ERROR_MISSING_CLOSING_PAREN;
}
break;
}
case TOK_END:
// The expression ended before we expected it.
// e.g. `2 - `.
// This means we have too few things.
// Instead of introducing another error, just call it
// "too few args".
type_ = TOK_ERROR;
error_ = TE_ERROR_TOO_FEW_ARGS;
break;
default:
if (type_ != TOK_ERROR || error_ == TE_ERROR_UNKNOWN) {
type_ = TOK_ERROR;
error_ = TE_ERROR_UNEXPECTED_TOKEN;
}
break;
}
return NAN;
}
double state::power() {
/* <power> = {("-" | "+")} <base> */
int sign = 1;
while (type_ == TOK_INFIX && (current_ == add || current_ == sub)) {
if (current_ == sub) sign = -sign;
next_token();
}
return sign * base();
}
double state::factor() {
/* <factor> = <power> {"^" <power>} */
auto ret = power();
if (type_ == TOK_INFIX && current_ == pow) {
next_token();
ret = pow(ret, factor());
}
return ret;
}
double state::term() {
/* <term> = <factor> {("*" | "/" | "%") <factor>} */
auto ret = factor();
while (type_ == TOK_INFIX && (current_ == mul || current_ == divide || current_ == fmod)) {
auto fn = current_;
auto tok = next_;
next_token();
auto ret2 = factor();
if (ret2 == 0 && (fn == divide || fn == fmod)) {
// Division by zero (also for modulo)
type_ = TOK_ERROR;
error_ = TE_ERROR_DIV_BY_ZERO;
// Error position is the "/" or "%" sign for now
errpos_ = tok;
errlen_ = 1;
}
ret = fn(ret, ret2);
}
return ret;
}
double state::expr() {
/* <expr> = <term> {("+" | "-") <term>} */
auto ret = term();
while (type_ == TOK_INFIX && (current_ == add || current_ == sub)) {
auto fn = current_;
next_token();
ret = fn(ret, term());
}
return ret;
}
double te_interp(const wchar_t *expression, te_error_t *error) {
state s{expression};
double ret = s.eval();
if (error) *error = s.error();
return ret;
}

View file

@ -1,54 +0,0 @@
/*
* TINYEXPR - Tiny recursive descent parser and evaluation engine in C
*
* Copyright (c) 2015, 2016 Lewis Van Winkle
*
* http://CodePlea.com
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgement in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
// This version was altered and ported to C++ for inclusion in fish.
#ifndef TINYEXPR_H
#define TINYEXPR_H
typedef enum {
TE_ERROR_NONE = 0,
TE_ERROR_UNKNOWN_FUNCTION = 1,
TE_ERROR_MISSING_CLOSING_PAREN = 2,
TE_ERROR_MISSING_OPENING_PAREN = 3,
TE_ERROR_TOO_FEW_ARGS = 4,
TE_ERROR_TOO_MANY_ARGS = 5,
TE_ERROR_MISSING_OPERATOR = 6,
TE_ERROR_UNEXPECTED_TOKEN = 7,
TE_ERROR_LOGICAL_OPERATOR = 8,
TE_ERROR_DIV_BY_ZERO = 9,
TE_ERROR_UNKNOWN = 10
} te_error_type_t;
typedef struct te_error_t {
te_error_type_t type;
int position;
int len;
} te_error_t;
/* Parses the input expression, evaluates it, and frees it. */
/* Returns NaN on error. */
double te_interp(const wchar_t *expression, te_error_t *error);
#endif /* TINYEXPR_H */

View file

@ -789,68 +789,6 @@ double fish_wcstod(const wcstring &str, wchar_t **endptr) {
return fish_wcstod(str.c_str(), endptr, str.size());
}
/// Like wcstod(), but allows underscore separators. Leading, trailing, and multiple underscores are
/// allowed, as are underscores next to decimal (.), exponent (E/e/P/p), and hexadecimal (X/x)
/// delimiters. This consumes trailing underscores -- endptr will point past the last underscore
/// which is legal to include in a parse (according to the above rules). Free-floating leading
/// underscores ("_ 3") are not allowed and will result in a no-parse. Underscores are not allowed
/// before or inside of "infinity" or "nan" input. Trailing underscores after "infinity" or "nan"
/// are not consumed.
double fish_wcstod_underscores(const wchar_t *str, wchar_t **endptr) {
const wchar_t *orig = str;
while (iswspace(*str)) str++; // Skip leading whitespace.
size_t leading_whitespace = size_t(str - orig);
auto is_sign = [](wchar_t c) { return c == L'+' || c == L'-'; };
auto is_inf_or_nan_char = [](wchar_t c) {
return c == L'i' || c == L'I' || c == L'n' || c == L'N';
};
// We don't do any underscore-stripping for infinity/NaN.
if (is_inf_or_nan_char(*str) || (is_sign(*str) && is_inf_or_nan_char(*(str + 1)))) {
return fish_wcstod(orig, endptr);
}
// We build a string to pass to the system wcstod, pruned of underscores. We will take all
// leading alphanumeric characters that can appear in a strtod numeric literal, dots (.), and
// signs (+/-). In order to be more clever, for example to stop earlier in the case of strings
// like "123xxxxx", we would need to do a full parse, because sometimes 'a' is a hex digit and
// sometimes it is the end of the parse, sometimes a dot '.' is a decimal delimiter and
// sometimes it is the end of the valid parse, as in "1_2.3_4.5_6", etc.
wcstring pruned;
// We keep track of the positions *in the pruned string* where there used to be underscores. We
// will pass the pruned version of the input string to the system wcstod, which in turn will
// tell us how many characters it consumed. Then we will set our own endptr based on (1) the
// number of characters consumed from the pruned string, and (2) how many underscores came
// before the last consumed character. The alternative to doing it this way (for example, "only
// deleting the correct underscores") would require actually parsing the input string, so that
// we can know when to stop grabbing characters and dropping underscores, as in "1_2.3_4.5_6".
std::vector<size_t> underscores;
// If we wanted to future-proof against a strtod from the future that, say, allows octal
// literals using 0o, etc., we could just use iswalnum, instead of iswxdigit and P/p/X/x checks.
while (iswxdigit(*str) || *str == L'P' || *str == L'p' || *str == L'X' || *str == L'x' ||
is_sign(*str) || *str == L'.' || *str == L'_') {
if (*str == L'_') {
underscores.push_back(pruned.length());
} else {
pruned.push_back(*str);
}
str++;
}
const wchar_t *pruned_begin = pruned.c_str();
const wchar_t *pruned_end = nullptr;
double result = fish_wcstod(pruned_begin, (wchar_t **)(&pruned_end));
if (pruned_end == pruned_begin) {
if (endptr) *endptr = (wchar_t *)orig;
return result;
}
auto consumed_underscores_end =
std::upper_bound(underscores.begin(), underscores.end(), size_t(pruned_end - pruned_begin));
size_t num_underscores_consumed = std::distance(underscores.begin(), consumed_underscores_end);
if (endptr) {
*endptr = (wchar_t *)(orig + leading_whitespace + (pruned_end - pruned_begin) +
num_underscores_consumed);
}
return result;
}
file_id_t file_id_t::from_stat(const struct stat &buf) {
file_id_t result = {};
result.device = buf.st_dev;

View file

@ -144,7 +144,6 @@ unsigned long long fish_wcstoull(const wchar_t *str, const wchar_t **endptr = nu
double fish_wcstod(const wchar_t *str, wchar_t **endptr, size_t len);
double fish_wcstod(const wchar_t *str, wchar_t **endptr);
double fish_wcstod(const wcstring &str, wchar_t **endptr);
double fish_wcstod_underscores(const wchar_t *str, wchar_t **endptr);
/// Class for representing a file's inode. We use this to detect and avoid symlink loops, among
/// other things. While an inode / dev pair is sufficient to distinguish co-existing files, Linux