/* * TINYEXPR - Tiny recursive descent parser and evaluation engine in C * * Copyright (c) 2015, 2016 Lewis Van Winkle * * http://CodePlea.com * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages * arising from the use of this software. * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: * * 1. The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software * in a product, an acknowledgement in the product documentation would be * appreciated but is not required. * 2. Altered source versions must be plainly marked as such, and must not be * misrepresented as being the original software. * 3. This notice may not be removed or altered from any source distribution. */ // This version has been altered and ported to C++ for inclusion in fish. #include "tinyexpr.h" #include #include #include #include #include #include #include #include #include "common.h" #include "fallback.h" // IWYU pragma: keep #include "wutil.h" struct te_fun_t { using fn_va = double (*)(const std::vector &); using fn_2 = double (*)(double, double); using fn_1 = double (*)(double); using fn_0 = double (*)(); constexpr te_fun_t(double val) : type_{CONSTANT}, arity_{0}, value{val} {} constexpr te_fun_t(fn_0 fn) : type_{FN_FIXED}, arity_{0}, fun0{fn} {} constexpr te_fun_t(fn_1 fn) : type_{FN_FIXED}, arity_{1}, fun1{fn} {} constexpr te_fun_t(fn_2 fn) : type_{FN_FIXED}, arity_{2}, fun2{fn} {} constexpr te_fun_t(fn_va fn) : type_{FN_VARIADIC}, arity_{-1}, fun_va{fn} {} bool operator==(fn_2 fn) const { return arity_ == 2 && fun2 == fn; } [[nodiscard]] int arity() const { return arity_; } double operator()() const { assert(arity_ == 0); return type_ == CONSTANT ? value : fun0(); } double operator()(double a, double b) const { assert(arity_ == 2); return fun2(a, b); } double operator()(const std::vector &args) const { if (type_ == FN_VARIADIC) return fun_va(args); if (arity_ != static_cast(args.size())) return NAN; switch (arity_) { case 0: return type_ == CONSTANT ? value : fun0(); case 1: return fun1(args[0]); case 2: return fun2(args[0], args[1]); } return NAN; } private: enum { CONSTANT, FN_FIXED, FN_VARIADIC, } type_; int arity_; union { double value; fn_0 fun0; fn_1 fun1; fn_2 fun2; fn_va fun_va; }; }; enum te_state_type_t { TOK_NULL, TOK_ERROR, TOK_END, TOK_SEP, TOK_OPEN, TOK_CLOSE, TOK_NUMBER, TOK_FUNCTION, TOK_INFIX }; struct state { explicit state(const wchar_t *expr) : start_{expr}, next_{expr} { next_token(); } double eval() { return expr(); } [[nodiscard]] te_error_t error() const { if (type_ == TOK_END) return {TE_ERROR_NONE, 0, 0}; // If we have an error position set, use that, // otherwise the current position. const wchar_t *tok = errpos_ ? errpos_ : next_; te_error_t err{error_, static_cast(tok - start_) + 1, errlen_}; if (error_ == TE_ERROR_NONE) { // If we're not at the end but there's no error, then that means we have a // superfluous token that we have no idea what to do with. err.type = TE_ERROR_TOO_MANY_ARGS; } return err; } private: te_state_type_t type_{TOK_NULL}; te_error_type_t error_{TE_ERROR_NONE}; const wchar_t *start_; const wchar_t *next_; const wchar_t *errpos_{nullptr}; int errlen_{0}; te_fun_t current_{NAN}; void next_token(); double expr(); double power(); double base(); double factor(); double term(); }; static double fac(double a) { /* simplest version of fac */ if (a < 0.0) return NAN; if (a > UINT_MAX) return INFINITY; auto ua = static_cast(a); unsigned long int result = 1, i; for (i = 1; i <= ua; i++) { if (i > ULONG_MAX / result) return INFINITY; result *= i; } return static_cast(result); } static double ncr(double n, double r) { // Doing this for NAN takes ages - just return the result right away. if (std::isnan(n)) return INFINITY; if (n < 0.0 || r < 0.0 || n < r) return NAN; if (n > UINT_MAX || r > UINT_MAX) return INFINITY; unsigned long int un = static_cast(n), ur = static_cast(r), i; unsigned long int result = 1; if (ur > un / 2) ur = un - ur; for (i = 1; i <= ur; i++) { if (result > ULONG_MAX / (un - ur + i)) return INFINITY; result *= un - ur + i; result /= i; } return result; } static double npr(double n, double r) { return ncr(n, r) * fac(r); } static constexpr double bit_and(double a, double b) { return static_cast(static_cast(a) & static_cast(b)); } static constexpr double bit_or(double a, double b) { return static_cast(static_cast(a) | static_cast(b)); } static constexpr double bit_xor(double a, double b) { return static_cast(static_cast(a) ^ static_cast(b)); } static double max(double a, double b) { if (std::isnan(a)) return a; if (std::isnan(b)) return b; if (a == b) return std::signbit(a) ? b : a; // treat +0 as larger than -0 return a > b ? a : b; } static double min(double a, double b) { if (std::isnan(a)) return a; if (std::isnan(b)) return b; if (a == b) return std::signbit(a) ? a : b; // treat -0 as smaller than +0 return a < b ? a : b; } static double maximum(const std::vector &args) { double ret = -std::numeric_limits::infinity(); for (auto a : args) ret = max(ret, a); return ret; } static double minimum(const std::vector &args) { double ret = std::numeric_limits::infinity(); for (auto a : args) ret = min(ret, a); return ret; } struct te_builtin { const wchar_t *name; te_fun_t fn; }; static constexpr te_builtin functions[] = { /* must be in alphabetical order */ // clang-format off {L"abs", std::fabs}, {L"acos", std::acos}, {L"asin", std::asin}, {L"atan", std::atan}, {L"atan2", std::atan2}, {L"bitand", bit_and}, {L"bitor", bit_or}, {L"bitxor", bit_xor}, {L"ceil", std::ceil}, {L"cos", std::cos}, {L"cosh", std::cosh}, {L"e", M_E}, {L"exp", std::exp}, {L"fac", fac}, {L"floor", std::floor}, {L"ln", std::log}, {L"log", std::log10}, {L"log10", std::log10}, {L"log2", std::log2}, {L"max", maximum}, {L"min", minimum}, {L"ncr", ncr}, {L"npr", npr}, {L"pi", M_PI}, {L"pow", std::pow}, {L"round", std::round}, {L"sin", std::sin}, {L"sinh", std::sinh}, {L"sqrt", std::sqrt}, {L"tan", std::tan}, {L"tanh", std::tanh}, {L"tau", 2 * M_PI}, // clang-format on }; ASSERT_SORTED_BY_NAME(functions); static const te_builtin *find_builtin(const wchar_t *name, int len) { const auto end = std::end(functions); const te_builtin *found = std::lower_bound(std::begin(functions), end, name, [len](const te_builtin &lhs, const wchar_t *rhs) { // The length is important because that's where // the parens start return std::wcsncmp(lhs.name, rhs, len) < 0; }); // We need to compare again because we might have gotten the first "larger" element. if (found != end && std::wcsncmp(found->name, name, len) == 0 && found->name[len] == 0) return found; return nullptr; } static constexpr double add(double a, double b) { return a + b; } static constexpr double sub(double a, double b) { return a - b; } static constexpr double mul(double a, double b) { return a * b; } static constexpr double divide(double a, double b) { // If b isn't zero, divide. // If a isn't zero, return signed INFINITY. // Else, return NAN. return b ? a / b : a ? copysign(1, a) * copysign(1, b) * INFINITY : NAN; } void state::next_token() { type_ = TOK_NULL; do { if (!*next_) { type_ = TOK_END; return; } /* Try reading a number. */ if ((next_[0] >= '0' && next_[0] <= '9') || next_[0] == '.') { current_ = fish_wcstod_underscores(next_, const_cast(&next_)); type_ = TOK_NUMBER; } else { /* Look for a function call. */ // But not when it's an "x" followed by whitespace // - that's the alternative multiplication operator. if (next_[0] >= 'a' && next_[0] <= 'z' && !(next_[0] == 'x' && isspace(next_[1]))) { const wchar_t *start = next_; while ((next_[0] >= 'a' && next_[0] <= 'z') || (next_[0] >= '0' && next_[0] <= '9') || (next_[0] == '_')) next_++; const te_builtin *var = find_builtin(start, next_ - start); if (var) { type_ = TOK_FUNCTION; current_ = var->fn; } else if (type_ != TOK_ERROR || error_ == TE_ERROR_UNKNOWN) { // Our error is more specific, so it takes precedence. type_ = TOK_ERROR; error_ = TE_ERROR_UNKNOWN_FUNCTION; errpos_ = start + 1; errlen_ = next_ - start; } } else { /* Look for an operator or special character. */ switch (next_++[0]) { case '+': type_ = TOK_INFIX; current_ = add; break; case '-': type_ = TOK_INFIX; current_ = sub; break; case 'x': case '*': // We've already checked for whitespace above. type_ = TOK_INFIX; current_ = mul; break; case '/': type_ = TOK_INFIX; current_ = divide; break; case '^': type_ = TOK_INFIX; current_ = pow; break; case '%': type_ = TOK_INFIX; current_ = fmod; break; case '(': type_ = TOK_OPEN; break; case ')': type_ = TOK_CLOSE; break; case ',': type_ = TOK_SEP; break; case ' ': case '\t': case '\n': case '\r': break; case '=': case '>': case '<': case '&': case '|': case '!': type_ = TOK_ERROR; error_ = TE_ERROR_LOGICAL_OPERATOR; break; default: type_ = TOK_ERROR; error_ = TE_ERROR_MISSING_OPERATOR; break; } } } } while (type_ == TOK_NULL); } double state::base() { /* = | {"(" ")"} | | * "(" {"," } ")" | "(" ")" */ auto next = next_; switch (type_) { case TOK_NUMBER: { auto val = current_(); next_token(); if (type_ == TOK_NUMBER || type_ == TOK_FUNCTION) { // Two numbers after each other: // math '5 2' // math '3 pi' // (of course 3 pi could also be interpreted as 3 x pi) type_ = TOK_ERROR; error_ = TE_ERROR_MISSING_OPERATOR; // The error should be given *between* // the last two tokens. errpos_ = next + 1; // Go to the end of whitespace. while (wcschr(L" \t\n\r", next++[0])); errlen_ = next - errpos_; } return val; } case TOK_FUNCTION: { auto fn = current_; int arity = fn.arity(); next_token(); const bool have_open = type_ == TOK_OPEN; if (have_open) { // If we *have* an opening parenthesis, // we need to consume it and // expect a closing one. next_token(); } if (arity == 0) { if (have_open) { if (type_ == TOK_CLOSE) { next_token(); } else if (type_ != TOK_ERROR || error_ == TE_ERROR_UNKNOWN) { type_ = TOK_ERROR; error_ = TE_ERROR_MISSING_CLOSING_PAREN; break; } } return fn(); } std::vector parameters; int i; for (i = 0; arity < 0 || i < arity; i++) { parameters.push_back(expr()); if (type_ != TOK_SEP) { break; } next_token(); } if (arity < 0 || i == arity - 1) { if (!have_open) { return fn(parameters); } if (type_ == TOK_CLOSE) { // We have an opening and a closing paren, consume the closing one and done. next_token(); return fn(parameters); } if (type_ != TOK_ERROR) { // If we had the right number of arguments, we're missing a closing paren. error_ = TE_ERROR_MISSING_CLOSING_PAREN; type_ = TOK_ERROR; } } if (type_ != TOK_ERROR || error_ == TE_ERROR_UNEXPECTED_TOKEN) { // Otherwise we complain about the number of arguments *first*, // a closing parenthesis should be more obvious. error_ = i < arity ? TE_ERROR_TOO_FEW_ARGS : TE_ERROR_TOO_MANY_ARGS; type_ = TOK_ERROR; } break; } case TOK_OPEN: { next_token(); auto ret = expr(); if (type_ == TOK_CLOSE) { next_token(); return ret; } if (type_ != TOK_ERROR && type_ != TOK_END && error_ == TE_ERROR_NONE) { type_ = TOK_ERROR; error_ = TE_ERROR_TOO_MANY_ARGS; } else if (type_ != TOK_ERROR || error_ == TE_ERROR_UNKNOWN) { type_ = TOK_ERROR; error_ = TE_ERROR_MISSING_CLOSING_PAREN; } break; } case TOK_END: // The expression ended before we expected it. // e.g. `2 - `. // This means we have too few things. // Instead of introducing another error, just call it // "too few args". type_ = TOK_ERROR; error_ = TE_ERROR_TOO_FEW_ARGS; break; default: if (type_ != TOK_ERROR || error_ == TE_ERROR_UNKNOWN) { type_ = TOK_ERROR; error_ = TE_ERROR_UNEXPECTED_TOKEN; } break; } return NAN; } double state::power() { /* = {("-" | "+")} */ int sign = 1; while (type_ == TOK_INFIX && (current_ == add || current_ == sub)) { if (current_ == sub) sign = -sign; next_token(); } return sign * base(); } double state::factor() { /* = {"^" } */ auto ret = power(); if (type_ == TOK_INFIX && current_ == pow) { next_token(); ret = pow(ret, factor()); } return ret; } double state::term() { /* = {("*" | "/" | "%") } */ auto ret = factor(); while (type_ == TOK_INFIX && (current_ == mul || current_ == divide || current_ == fmod)) { auto fn = current_; auto tok = next_; next_token(); auto ret2 = factor(); if (ret2 == 0 && (fn == divide || fn == fmod)) { // Division by zero (also for modulo) type_ = TOK_ERROR; error_ = TE_ERROR_DIV_BY_ZERO; // Error position is the "/" or "%" sign for now errpos_ = tok; errlen_ = 1; } ret = fn(ret, ret2); } return ret; } double state::expr() { /* = {("+" | "-") } */ auto ret = term(); while (type_ == TOK_INFIX && (current_ == add || current_ == sub)) { auto fn = current_; next_token(); ret = fn(ret, term()); } return ret; } double te_interp(const wchar_t *expression, te_error_t *error) { state s{expression}; double ret = s.eval(); if (error) *error = s.error(); return ret; }