diff --git a/src/tinyexpr.cpp b/src/tinyexpr.cpp index 210f6b691..61b0f8163 100644 --- a/src/tinyexpr.cpp +++ b/src/tinyexpr.cpp @@ -27,29 +27,68 @@ #include #include -#include -#include #include #include -#include #include -#include +#include +#include "common.h" #include "fallback.h" // IWYU pragma: keep #include "wutil.h" -// TODO: It would be nice not to rely on a typedef for this, especially one that can only do -// functions with two args. -using te_fun2 = double (*)(double, double); -using te_fun1 = double (*)(double); -using te_fun0 = double (*)(); +struct te_fun_t { + using fn_2 = double (*)(double, double); + using fn_1 = double (*)(double); + using fn_0 = double (*)(); -enum { - TE_CONSTANT = 0, - TE_FUNCTION0, - TE_FUNCTION1, - TE_FUNCTION2, + te_fun_t(double val) : type_{CONSTANT}, arity_{0}, value{val} {} + te_fun_t(fn_0 fn) : type_{FUNCTION}, arity_{0}, fun0{fn} {} + te_fun_t(fn_1 fn) : type_{FUNCTION}, arity_{1}, fun1{fn} {} + te_fun_t(fn_2 fn) : type_{FUNCTION}, arity_{2}, fun2{fn} {} + + bool operator==(fn_2 fn) const { return arity_ == 2 && fun2 == fn; } + + [[nodiscard]] int arity() const { return arity_; } + + double operator()() const { + assert(arity_ == 0); + return type_ == CONSTANT ? value : fun0(); + } + + double operator()(double a, double b) const { + assert(arity_ == 2); + return fun2(a, b); + } + + double operator()(const std::vector &args) const { + if (arity_ != static_cast(args.size())) return NAN; + switch (arity_) { + case 0: + return type_ == CONSTANT ? value : fun0(); + case 1: + return fun1(args[0]); + case 2: + return fun2(args[0], args[1]); + } + return NAN; + } + + private: + enum { + CONSTANT, + FUNCTION, + } type_; + int arity_; + union { + double value; + fn_0 fun0; + fn_1 fun1; + fn_2 fun2; + }; +}; + +enum te_state_type_t { TOK_NULL, TOK_ERROR, TOK_END, @@ -57,96 +96,42 @@ enum { TOK_OPEN, TOK_CLOSE, TOK_NUMBER, + TOK_FUNCTION, TOK_INFIX }; -static int get_arity(const int type) { - if (type == TE_FUNCTION2) return 2; - if (type == TE_FUNCTION1) return 1; - return 0; -} - -struct te_expr_t { - int type; - union { - double value; - te_fun0 fun0; - te_fun1 fun1; - te_fun2 fun2; - }; - te_expr_t *parameters[]; -}; - -struct te_builtin { - const wchar_t *name; - void *address; - int type; -}; - struct state { - explicit state(const wchar_t *expr) : start{expr}, next{expr} { next_token(); } - te_expr_t *expr(); + explicit state(const wchar_t *expr) : start_{expr}, next_{expr} { next_token(); } + double eval() { return expr(); } - union { - double value; - void *function; - }; - const wchar_t *start; - const wchar_t *next; - int type; - te_error_type_t error{TE_ERROR_NONE}; + [[nodiscard]] te_error_t error() const { + if (type_ == TOK_END) return {TE_ERROR_NONE, 0}; + te_error_t err{error_, static_cast(next_ - start_) + 1}; + if (error_ == TE_ERROR_NONE) { + // If we're not at the end but there's no error, then that means we have a + // superfluous token that we have no idea what to do with. + err.type = TE_ERROR_TOO_MANY_ARGS; + } + return err; + } private: + te_state_type_t type_{TOK_NULL}; + te_error_type_t error_{TE_ERROR_NONE}; + + const wchar_t *start_; + const wchar_t *next_; + + te_fun_t current_{NAN}; void next_token(); - te_expr_t *power(); - te_expr_t *base(); - te_expr_t *factor(); - te_expr_t *term(); + double expr(); + double power(); + double base(); + double factor(); + double term(); }; -// TODO: That move there? Ouch. Replace with a proper class with a constructor. -#define NEW_EXPR(type, ...) new_expr((type), std::move((const te_expr_t *[]){__VA_ARGS__})) - -static te_expr_t *new_expr(const int type, const te_expr_t *parameters[]) { - const int arity = get_arity(type); - const int psize = sizeof(te_expr_t *) * arity; - const int size = sizeof(te_expr_t) + psize; - auto ret = static_cast(malloc(size)); - // This sets float to 0, which depends on the implementation. - // We rely on IEEE-754 floats anyway, so it's okay. - std::memset(ret, 0, size); - if (arity && parameters) { - std::memcpy(ret->parameters, parameters, psize); - } - ret->type = type; - return ret; -} - -/* Frees the expression. */ -/* This is safe to call on NULL pointers. */ -static void te_free(te_expr_t *n); - -static void te_free_parameters(te_expr_t *n) { - if (!n) return; - int arity = get_arity(n->type); - // Free all parameters from the back to the front. - while (arity > 0) { - te_free(n->parameters[arity - 1]); - arity--; - } -} - -static void te_free(te_expr_t *n) { - if (!n) return; - te_free_parameters(n); - free(n); -} - -static constexpr double pi() { return M_PI; } -static constexpr double tau() { return 2 * M_PI; } -static constexpr double e() { return M_E; } - static double fac(double a) { /* simplest version of fac */ if (a < 0.0) return NAN; if (a > UINT_MAX) return INFINITY; @@ -203,40 +188,47 @@ static double min(double a, double b) { return a < b ? a : b; } +struct te_builtin { + const wchar_t *name; + te_fun_t fn; +}; + static const te_builtin functions[] = { /* must be in alphabetical order */ - {L"abs", reinterpret_cast(static_cast(std::fabs)), TE_FUNCTION1}, - {L"acos", reinterpret_cast(static_cast(std::acos)), TE_FUNCTION1}, - {L"asin", reinterpret_cast(static_cast(std::asin)), TE_FUNCTION1}, - {L"atan", reinterpret_cast(static_cast(std::atan)), TE_FUNCTION1}, - {L"atan2", reinterpret_cast(static_cast(std::atan2)), TE_FUNCTION2}, - {L"bitand", reinterpret_cast(static_cast(bit_and)), TE_FUNCTION2}, - {L"bitor", reinterpret_cast(static_cast(bit_or)), TE_FUNCTION2}, - {L"bitxor", reinterpret_cast(static_cast(bit_xor)), TE_FUNCTION2}, - {L"ceil", reinterpret_cast(static_cast(std::ceil)), TE_FUNCTION1}, - {L"cos", reinterpret_cast(static_cast(std::cos)), TE_FUNCTION1}, - {L"cosh", reinterpret_cast(static_cast(std::cosh)), TE_FUNCTION1}, - {L"e", reinterpret_cast(static_cast(e)), TE_FUNCTION0}, - {L"exp", reinterpret_cast(static_cast(std::exp)), TE_FUNCTION1}, - {L"fac", reinterpret_cast(static_cast(fac)), TE_FUNCTION1}, - {L"floor", reinterpret_cast(static_cast(std::floor)), TE_FUNCTION1}, - {L"ln", reinterpret_cast(static_cast(std::log)), TE_FUNCTION1}, - {L"log", reinterpret_cast(static_cast(std::log10)), TE_FUNCTION1}, - {L"log10", reinterpret_cast(static_cast(std::log10)), TE_FUNCTION1}, - {L"log2", reinterpret_cast(static_cast(std::log2)), TE_FUNCTION1}, - {L"max", reinterpret_cast(static_cast(max)), TE_FUNCTION2}, - {L"min", reinterpret_cast(static_cast(min)), TE_FUNCTION2}, - {L"ncr", reinterpret_cast(static_cast(ncr)), TE_FUNCTION2}, - {L"npr", reinterpret_cast(static_cast(npr)), TE_FUNCTION2}, - {L"pi", reinterpret_cast(static_cast(pi)), TE_FUNCTION0}, - {L"pow", reinterpret_cast(static_cast(std::pow)), TE_FUNCTION2}, - {L"round", reinterpret_cast(static_cast(std::round)), TE_FUNCTION1}, - {L"sin", reinterpret_cast(static_cast(std::sin)), TE_FUNCTION1}, - {L"sinh", reinterpret_cast(static_cast(std::sinh)), TE_FUNCTION1}, - {L"sqrt", reinterpret_cast(static_cast(std::sqrt)), TE_FUNCTION1}, - {L"tan", reinterpret_cast(static_cast(std::tan)), TE_FUNCTION1}, - {L"tanh", reinterpret_cast(static_cast(std::tanh)), TE_FUNCTION1}, - {L"tau", reinterpret_cast(static_cast(tau)), TE_FUNCTION0}, + // clang-format off + {L"abs", std::fabs}, + {L"acos", std::acos}, + {L"asin", std::asin}, + {L"atan", std::atan}, + {L"atan2", std::atan2}, + {L"bitand", bit_and}, + {L"bitor", bit_or}, + {L"bitxor", bit_xor}, + {L"ceil", std::ceil}, + {L"cos", std::cos}, + {L"cosh", std::cosh}, + {L"e", M_E}, + {L"exp", std::exp}, + {L"fac", fac}, + {L"floor", std::floor}, + {L"ln", std::log}, + {L"log", std::log10}, + {L"log10", std::log10}, + {L"log2", std::log2}, + {L"max", max}, + {L"min", min}, + {L"ncr", ncr}, + {L"npr", npr}, + {L"pi", M_PI}, + {L"pow", std::pow}, + {L"round", std::round}, + {L"sin", std::sin}, + {L"sinh", std::sinh}, + {L"sqrt", std::sqrt}, + {L"tan", std::tan}, + {L"tanh", std::tanh}, + {L"tau", 2 * M_PI}, + // clang-format on }; static const te_builtin *find_builtin(const wchar_t *name, int len) { @@ -263,86 +255,76 @@ static constexpr double divide(double a, double b) { return b ? a / b : a ? copysign(1, a) * copysign(1, b) * INFINITY : NAN; } -static constexpr double negate(double a) { return -a; } - void state::next_token() { - type = TOK_NULL; + type_ = TOK_NULL; do { - if (!*next) { - type = TOK_END; + if (!*next_) { + type_ = TOK_END; return; } /* Try reading a number. */ - if ((next[0] >= '0' && next[0] <= '9') || next[0] == '.') { - value = fish_wcstod(next, const_cast(&next)); - type = TOK_NUMBER; + if ((next_[0] >= '0' && next_[0] <= '9') || next_[0] == '.') { + current_ = fish_wcstod(next_, const_cast(&next_)); + type_ = TOK_NUMBER; } else { /* Look for a function call. */ // But not when it's an "x" followed by whitespace // - that's the alternative multiplication operator. - if (next[0] >= 'a' && next[0] <= 'z' && !(next[0] == 'x' && isspace(next[1]))) { - const wchar_t *start; - start = next; - while ((next[0] >= 'a' && next[0] <= 'z') || (next[0] >= '0' && next[0] <= '9') || - (next[0] == '_')) - next++; + if (next_[0] >= 'a' && next_[0] <= 'z' && !(next_[0] == 'x' && isspace(next_[1]))) { + const wchar_t *start = next_; + while ((next_[0] >= 'a' && next_[0] <= 'z') || + (next_[0] >= '0' && next_[0] <= '9') || (next_[0] == '_')) + next_++; - const te_builtin *var = find_builtin(start, next - start); + const te_builtin *var = find_builtin(start, next_ - start); if (var) { - switch (var->type) { - case TE_FUNCTION0: - case TE_FUNCTION1: - case TE_FUNCTION2: - type = var->type; - function = var->address; - break; - } - } else if (type != TOK_ERROR || error == TE_ERROR_UNKNOWN) { + type_ = TOK_FUNCTION; + current_ = var->fn; + } else if (type_ != TOK_ERROR || error_ == TE_ERROR_UNKNOWN) { // Our error is more specific, so it takes precedence. - type = TOK_ERROR; - error = TE_ERROR_UNKNOWN_FUNCTION; + type_ = TOK_ERROR; + error_ = TE_ERROR_UNKNOWN_FUNCTION; } } else { /* Look for an operator or special character. */ - switch (next++[0]) { - // The "te_fun2" casts are necessary to pick the right overload. + switch (next_++[0]) { case '+': - type = TOK_INFIX; - function = reinterpret_cast(static_cast(add)); + type_ = TOK_INFIX; + current_ = add; break; case '-': - type = TOK_INFIX; - function = reinterpret_cast(static_cast(sub)); + type_ = TOK_INFIX; + current_ = sub; break; case 'x': case '*': // We've already checked for whitespace above. - type = TOK_INFIX; - function = reinterpret_cast(static_cast(mul)); + type_ = TOK_INFIX; + current_ = mul; break; case '/': - type = TOK_INFIX; - function = reinterpret_cast(static_cast(divide)); + type_ = TOK_INFIX; + current_ = divide; break; case '^': - type = TOK_INFIX; - function = reinterpret_cast(static_cast(pow)); + type_ = TOK_INFIX; + current_ = pow; break; case '%': - type = TOK_INFIX; - function = reinterpret_cast(static_cast(fmod)); + type_ = TOK_INFIX; + current_ = fmod; break; case '(': - type = TOK_OPEN; + type_ = TOK_OPEN; break; case ')': - type = TOK_CLOSE; + type_ = TOK_CLOSE; break; case ',': - type = TOK_SEP; + type_ = TOK_SEP; break; case ' ': case '\t': @@ -355,124 +337,121 @@ void state::next_token() { case '&': case '|': case '!': - type = TOK_ERROR; - error = TE_ERROR_LOGICAL_OPERATOR; + type_ = TOK_ERROR; + error_ = TE_ERROR_LOGICAL_OPERATOR; break; default: - type = TOK_ERROR; - error = TE_ERROR_MISSING_OPERATOR; + type_ = TOK_ERROR; + error_ = TE_ERROR_MISSING_OPERATOR; break; } } } - } while (type == TOK_NULL); + } while (type_ == TOK_NULL); } -te_expr_t *state::base() { +double state::base() { /* = | {"(" ")"} | | * "(" {"," } ")" | "(" ")" */ - te_expr_t *ret; - int arity; - auto previous = start; - auto next = this->next; - switch (type) { - case TOK_NUMBER: - ret = new_expr(TE_CONSTANT, nullptr); - ret->value = value; + auto previous = start_; + auto next = next_; + switch (type_) { + case TOK_NUMBER: { + auto val = current_(); next_token(); - if (type == TOK_NUMBER || type == TE_FUNCTION0) { + if (type_ == TOK_NUMBER || type_ == TOK_FUNCTION) { // Two numbers after each other: // math '5 2' // math '3 pi' // (of course 3 pi could also be interpreted as 3 x pi) - type = TOK_ERROR; - error = TE_ERROR_MISSING_OPERATOR; + type_ = TOK_ERROR; + error_ = TE_ERROR_MISSING_OPERATOR; // The error should be given *between* // the last two tokens. // Since these are two separate numbers there is at least // one space between. - start = previous; - this->next = next + 1; + start_ = previous; + next_ = next + 1; } - break; + return val; + } - case TE_FUNCTION0: - ret = new_expr(type, nullptr); - ret->fun0 = reinterpret_cast(function); - next_token(); - if (type == TOK_OPEN) { - next_token(); - if (type == TOK_CLOSE) { - next_token(); - } else if (type != TOK_ERROR || error == TE_ERROR_UNKNOWN) { - type = TOK_ERROR; - error = TE_ERROR_MISSING_CLOSING_PAREN; - } - } - break; - - case TE_FUNCTION1: - case TE_FUNCTION2: { - arity = get_arity(type); - - ret = new_expr(type, nullptr); - ret->fun0 = reinterpret_cast(function); + case TOK_FUNCTION: { + auto fn = current_; + int arity = fn.arity(); next_token(); - bool have_open = false; - if (type == TOK_OPEN) { + const bool have_open = type_ == TOK_OPEN; + if (have_open) { // If we *have* an opening parenthesis, // we need to consume it and // expect a closing one. - have_open = true; next_token(); } + if (arity == 0) { + if (have_open) { + if (type_ == TOK_CLOSE) { + next_token(); + } else if (type_ != TOK_ERROR || error_ == TE_ERROR_UNKNOWN) { + type_ = TOK_ERROR; + error_ = TE_ERROR_MISSING_CLOSING_PAREN; + break; + } + } + return fn(); + } + + std::vector parameters; int i; for (i = 0; i < arity; i++) { - ret->parameters[i] = expr(); - if (type != TOK_SEP) { + parameters.push_back(expr()); + if (type_ != TOK_SEP) { break; } next_token(); } if (!have_open && i == arity - 1) { - break; + return fn(parameters); } - if (have_open && type == TOK_CLOSE && i == arity - 1) { + if (have_open && type_ == TOK_CLOSE && i == arity - 1) { // We have an opening and a closing paren, consume the closing one and done. next_token(); - } else if (type != TOK_ERROR || error == TE_ERROR_UNEXPECTED_TOKEN) { + return fn(parameters); + } + if (type_ != TOK_ERROR || error_ == TE_ERROR_UNEXPECTED_TOKEN) { // If we had the right number of arguments, we're missing a closing paren. - if (have_open && i == arity - 1 && type != TOK_ERROR) { - error = TE_ERROR_MISSING_CLOSING_PAREN; + if (have_open && i == arity - 1 && type_ != TOK_ERROR) { + error_ = TE_ERROR_MISSING_CLOSING_PAREN; } else { // Otherwise we complain about the number of arguments *first*, // a closing parenthesis should be more obvious. - error = i < arity ? TE_ERROR_TOO_FEW_ARGS : TE_ERROR_TOO_MANY_ARGS; + error_ = i < arity ? TE_ERROR_TOO_FEW_ARGS : TE_ERROR_TOO_MANY_ARGS; } - type = TOK_ERROR; + type_ = TOK_ERROR; } - break; } - case TOK_OPEN: + case TOK_OPEN: { next_token(); - ret = expr(); - if (type == TOK_CLOSE) { + auto ret = expr(); + if (type_ == TOK_CLOSE) { next_token(); - } else if (type != TOK_ERROR && type != TOK_END && error == TE_ERROR_NONE) { - type = TOK_ERROR; - error = TE_ERROR_TOO_MANY_ARGS; - } else if (type != TOK_ERROR || error == TE_ERROR_UNKNOWN) { - type = TOK_ERROR; - error = TE_ERROR_MISSING_CLOSING_PAREN; + return ret; + } + if (type_ != TOK_ERROR && type_ != TOK_END && error_ == TE_ERROR_NONE) { + type_ = TOK_ERROR; + error_ = TE_ERROR_TOO_MANY_ARGS; + } else if (type_ != TOK_ERROR || error_ == TE_ERROR_UNKNOWN) { + type_ = TOK_ERROR; + error_ = TE_ERROR_MISSING_CLOSING_PAREN; } break; + } case TOK_END: // The expression ended before we expected it. @@ -480,178 +459,67 @@ te_expr_t *state::base() { // This means we have too few things. // Instead of introducing another error, just call it // "too few args". - ret = new_expr(0, nullptr); - type = TOK_ERROR; - error = TE_ERROR_TOO_FEW_ARGS; - ret->value = NAN; + type_ = TOK_ERROR; + error_ = TE_ERROR_TOO_FEW_ARGS; break; default: - ret = new_expr(0, nullptr); - if (type != TOK_ERROR || error == TE_ERROR_UNKNOWN) { - type = TOK_ERROR; - error = TE_ERROR_UNEXPECTED_TOKEN; + if (type_ != TOK_ERROR || error_ == TE_ERROR_UNKNOWN) { + type_ = TOK_ERROR; + error_ = TE_ERROR_UNEXPECTED_TOKEN; } - ret->value = NAN; break; } - return ret; + return NAN; } -te_expr_t *state::power() { +double state::power() { /* = {("-" | "+")} */ int sign = 1; - while (type == TOK_INFIX && (function == add || function == sub)) { - if (function == sub) sign = -sign; + while (type_ == TOK_INFIX && (current_ == add || current_ == sub)) { + if (current_ == sub) sign = -sign; next_token(); } - - te_expr_t *ret; - - if (sign == 1) { - ret = base(); - } else { - ret = NEW_EXPR(TE_FUNCTION1, base()); - ret->fun1 = negate; - } - - return ret; + return sign * base(); } -te_expr_t *state::factor() { +double state::factor() { /* = {"^" } */ - te_expr_t *ret = power(); + auto ret = power(); - te_expr_t *insertion = nullptr; - - while (type == TOK_INFIX && (function == reinterpret_cast(static_cast(pow)))) { - auto t = reinterpret_cast(function); + if (type_ == TOK_INFIX && current_ == pow) { next_token(); - - if (insertion) { - /* Make exponentiation go right-to-left. */ - te_expr_t *insert = NEW_EXPR(TE_FUNCTION2, insertion->parameters[1], power()); - insert->fun2 = t; - insertion->parameters[1] = insert; - insertion = insert; - } else { - ret = NEW_EXPR(TE_FUNCTION2, ret, power()); - ret->fun2 = t; - insertion = ret; - } + ret = pow(ret, factor()); } return ret; } -te_expr_t *state::term() { +double state::term() { /* = {("*" | "/" | "%") } */ - te_expr_t *ret = factor(); - - while (type == TOK_INFIX && - (function == reinterpret_cast(static_cast(mul)) || - function == reinterpret_cast(static_cast(divide)) || - function == reinterpret_cast(static_cast(fmod)))) { - auto t = reinterpret_cast(function); + auto ret = factor(); + while (type_ == TOK_INFIX && (current_ == mul || current_ == divide || current_ == fmod)) { + auto fn = current_; next_token(); - ret = NEW_EXPR(TE_FUNCTION2, ret, factor()); - ret->fun2 = t; + ret = fn(ret, factor()); } - return ret; } -te_expr_t *state::expr() { +double state::expr() { /* = {("+" | "-") } */ - te_expr_t *ret = term(); - - while (type == TOK_INFIX && (function == add || function == sub)) { - auto t = reinterpret_cast(function); + auto ret = term(); + while (type_ == TOK_INFIX && (current_ == add || current_ == sub)) { + auto fn = current_; next_token(); - ret = NEW_EXPR(TE_FUNCTION2, ret, term()); - ret->fun2 = t; + ret = fn(ret, term()); } - return ret; } -#define M(e) te_eval(n->parameters[e]) - -/* Evaluates the expression. */ -static double te_eval(const te_expr_t *n) { - if (!n) return NAN; - - switch (n->type) { - case TE_CONSTANT: - return n->value; - case TE_FUNCTION0: - return n->fun0(); - case TE_FUNCTION1: - return n->fun1(M(0)); - case TE_FUNCTION2: - return n->fun2(M(0), M(1)); - default: - return NAN; - } -} - -#undef M - -static void optimize(te_expr_t *n) { - /* Evaluates as much as possible. */ - if (!n || n->type == TE_CONSTANT) return; - - const int arity = get_arity(n->type); - bool known = true; - for (int i = 0; i < arity; ++i) { - optimize(n->parameters[i]); - if ((n->parameters[i])->type != TE_CONSTANT) { - known = false; - } - } - if (known) { - const double value = te_eval(n); - te_free_parameters(n); - n->type = TE_CONSTANT; - n->value = value; - } -} - -/* Parses the input expression. */ -/* Returns NULL on error. */ -static te_expr_t *te_compile(const wchar_t *expression, te_error_t *error) { - state s{expression}; - - te_expr_t *root = s.expr(); - - if (s.type != TOK_END) { - te_free(root); - if (error) { - error->position = (s.next - s.start) + 1; - if (s.error != TE_ERROR_NONE) { - error->type = s.error; - } else { - // If we're not at the end but there's no error, then that means we have a - // superfluous token that we have no idea what to do with. - error->type = TE_ERROR_TOO_MANY_ARGS; - } - } - return nullptr; - } else { - optimize(root); - if (error) error->position = 0; - return root; - } -} - double te_interp(const wchar_t *expression, te_error_t *error) { - te_expr_t *n = te_compile(expression, error); - double ret; - if (n) { - ret = te_eval(n); - te_free(n); - } else { - ret = NAN; - } + state s{expression}; + double ret = s.eval(); + if (error) *error = s.error(); return ret; }