fish-shell/src/tinyexpr.c
Fabian Homborg f60e1549a9 [math] Better error for 2 + 2 4
This now reports "TOO_MANY_ARGS" instead of no error (and triggering
an assertion).

We might want to add a new error type or report the missing operator
before, but this is okay for now.
2018-03-01 13:09:35 +01:00

573 lines
18 KiB
C
Executable file

/*
* TINYEXPR - Tiny recursive descent parser and evaluation engine in C
*
* Copyright (c) 2015, 2016 Lewis Van Winkle
*
* http://CodePlea.com
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgement in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
// This version has been altered for inclusion in fish.
#include "tinyexpr.h"
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <stdio.h>
#include <limits.h>
typedef double (*te_fun2)(double, double);
enum {
TOK_NULL = TE_FUNCTION0+16, TOK_ERROR, TOK_END, TOK_SEP,
TOK_OPEN, TOK_CLOSE, TOK_NUMBER, TOK_VARIABLE, TOK_INFIX
};
enum {TE_CONSTANT = 1};
typedef struct state {
const char *start;
const char *next;
int type;
union {double value; const double *bound; const void *function;};
void *context;
const te_variable *lookup;
int lookup_len;
te_error_type_t error;
} state;
#define TYPE_MASK(TYPE) ((TYPE)&0x0000001F)
#define IS_PURE(TYPE) (((TYPE) & TE_FLAG_PURE) != 0)
#define IS_FUNCTION(TYPE) (((TYPE) & TE_FUNCTION0) != 0)
#define ARITY(TYPE) ( ((TYPE) & TE_FUNCTION0) ? ((TYPE) & 0x00000007) : 0 )
#define NEW_EXPR(type, ...) new_expr((type), (const te_expr*[]){__VA_ARGS__})
static te_expr *new_expr(const int type, const te_expr *parameters[]) {
const int arity = ARITY(type);
const int psize = sizeof(void*) * arity;
const int size = (sizeof(te_expr) - sizeof(void*)) + psize;
te_expr *ret = malloc(size);
memset(ret, 0, size);
if (arity && parameters) {
memcpy(ret->parameters, parameters, psize);
}
ret->type = type;
ret->bound = 0;
return ret;
}
void te_free_parameters(te_expr *n) {
if (!n) return;
switch (TYPE_MASK(n->type)) {
case TE_FUNCTION7: te_free(n->parameters[6]);
case TE_FUNCTION6: te_free(n->parameters[5]);
case TE_FUNCTION5: te_free(n->parameters[4]);
case TE_FUNCTION4: te_free(n->parameters[3]);
case TE_FUNCTION3: te_free(n->parameters[2]);
case TE_FUNCTION2: te_free(n->parameters[1]);
case TE_FUNCTION1: te_free(n->parameters[0]);
}
}
void te_free(te_expr *n) {
if (!n) return;
te_free_parameters(n);
free(n);
}
static double pi() {return 3.14159265358979323846;}
static double e() {return 2.71828182845904523536;}
static double fac(double a) {/* simplest version of fac */
if (a < 0.0)
return NAN;
if (a > UINT_MAX)
return INFINITY;
unsigned int ua = (unsigned int)(a);
unsigned long int result = 1, i;
for (i = 1; i <= ua; i++) {
if (i > ULONG_MAX / result)
return INFINITY;
result *= i;
}
return (double)result;
}
static double ncr(double n, double r) {
if (n < 0.0 || r < 0.0 || n < r) return NAN;
if (n > UINT_MAX || r > UINT_MAX) return INFINITY;
unsigned long int un = (unsigned int)(n), ur = (unsigned int)(r), i;
unsigned long int result = 1;
if (ur > un / 2) ur = un - ur;
for (i = 1; i <= ur; i++) {
if (result > ULONG_MAX / (un - ur + i))
return INFINITY;
result *= un - ur + i;
result /= i;
}
return result;
}
static double npr(double n, double r) {return ncr(n, r) * fac(r);}
static const te_variable functions[] = {
/* must be in alphabetical order */
{"abs", fabs, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"acos", acos, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"asin", asin, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"atan", atan, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"atan2", atan2, TE_FUNCTION2 | TE_FLAG_PURE, 0},
{"ceil", ceil, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"cos", cos, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"cosh", cosh, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"e", e, TE_FUNCTION0 | TE_FLAG_PURE, 0},
{"exp", exp, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"fac", fac, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"floor", floor, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"ln", log, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"log", log10, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"log10", log10, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"ncr", ncr, TE_FUNCTION2 | TE_FLAG_PURE, 0},
{"npr", npr, TE_FUNCTION2 | TE_FLAG_PURE, 0},
{"pi", pi, TE_FUNCTION0 | TE_FLAG_PURE, 0},
{"pow", pow, TE_FUNCTION2 | TE_FLAG_PURE, 0},
{"sin", sin, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"sinh", sinh, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"sqrt", sqrt, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"tan", tan, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{"tanh", tanh, TE_FUNCTION1 | TE_FLAG_PURE, 0},
{0, 0, 0, 0}
};
static const te_variable *find_builtin(const char *name, int len) {
int imin = 0;
int imax = sizeof(functions) / sizeof(te_variable) - 2;
/*Binary search.*/
while (imax >= imin) {
const int i = (imin + ((imax-imin)/2));
int c = strncmp(name, functions[i].name, len);
if (!c) c = '\0' - functions[i].name[len];
if (c == 0) {
return functions + i;
} else if (c > 0) {
imin = i + 1;
} else {
imax = i - 1;
}
}
return 0;
}
static const te_variable *find_lookup(const state *s, const char *name, int len) {
int iters;
const te_variable *var;
if (!s->lookup) return 0;
for (var = s->lookup, iters = s->lookup_len; iters; ++var, --iters) {
if (strncmp(name, var->name, len) == 0 && var->name[len] == '\0') {
return var;
}
}
return 0;
}
static double add(double a, double b) {return a + b;}
static double sub(double a, double b) {return a - b;}
static double mul(double a, double b) {return a * b;}
static double divide(double a, double b) {return a / b;}
static double negate(double a) {return -a;}
void next_token(state *s) {
s->type = TOK_NULL;
do {
if (!*s->next){
s->type = TOK_END;
return;
}
/* Try reading a number. */
if ((s->next[0] >= '0' && s->next[0] <= '9') || s->next[0] == '.') {
s->value = strtod(s->next, (char**)&s->next);
s->type = TOK_NUMBER;
} else {
/* Look for a variable or builtin function call. */
if (s->next[0] >= 'a' && s->next[0] <= 'z') {
const char *start;
start = s->next;
while ((s->next[0] >= 'a' && s->next[0] <= 'z') || (s->next[0] >= '0' && s->next[0] <= '9') || (s->next[0] == '_')) s->next++;
const te_variable *var = find_lookup(s, start, s->next - start);
if (!var) var = find_builtin(start, s->next - start);
if (var) {
switch(TYPE_MASK(var->type))
{
case TE_VARIABLE:
s->type = TOK_VARIABLE;
s->bound = var->address;
break;
case TE_FUNCTION0: case TE_FUNCTION1: case TE_FUNCTION2: case TE_FUNCTION3:
case TE_FUNCTION4: case TE_FUNCTION5: case TE_FUNCTION6: case TE_FUNCTION7:
s->type = var->type;
s->function = var->address;
break;
}
} else if (s->type != TOK_ERROR
|| s->error == TE_ERROR_UNKNOWN) {
// Our error is more specific, so it takes precedence.
s->type = TOK_ERROR;
s->error = TE_ERROR_UNKNOWN_VARIABLE;
}
} else {
/* Look for an operator or special character. */
switch (s->next++[0]) {
case '+': s->type = TOK_INFIX; s->function = add; break;
case '-': s->type = TOK_INFIX; s->function = sub; break;
case '*': s->type = TOK_INFIX; s->function = mul; break;
case '/': s->type = TOK_INFIX; s->function = divide; break;
case '^': s->type = TOK_INFIX; s->function = pow; break;
case '%': s->type = TOK_INFIX; s->function = fmod; break;
case '(': s->type = TOK_OPEN; break;
case ')': s->type = TOK_CLOSE; break;
case ',': s->type = TOK_SEP; break;
case ' ': case '\t': case '\n': case '\r': break;
default: s->type = TOK_ERROR; s->error = TE_ERROR_MISSING_OPERATOR; break;
}
}
}
} while (s->type == TOK_NULL);
}
static te_expr *expr(state *s);
static te_expr *power(state *s);
static te_expr *base(state *s) {
/* <base> = <constant> | <variable> | <function-0> {"(" ")"} | <function-1> <power> | <function-X> "(" <expr> {"," <expr>} ")" | "(" <list> ")" */
te_expr *ret;
int arity;
switch (TYPE_MASK(s->type)) {
case TOK_NUMBER:
ret = new_expr(TE_CONSTANT, 0);
ret->value = s->value;
next_token(s);
break;
case TOK_VARIABLE:
ret = new_expr(TE_VARIABLE, 0);
ret->bound = s->bound;
next_token(s);
break;
case TE_FUNCTION0:
ret = new_expr(s->type, 0);
ret->function = s->function;
next_token(s);
if (s->type == TOK_OPEN) {
next_token(s);
if (s->type == TOK_CLOSE) {
next_token(s);
} else if (s->type != TOK_ERROR
|| s->error == TE_ERROR_UNKNOWN) {
s->type = TOK_ERROR;
s->error = TE_ERROR_MISSING_CLOSING_PAREN;
}
}
break;
case TE_FUNCTION1:
case TE_FUNCTION2: case TE_FUNCTION3: case TE_FUNCTION4:
case TE_FUNCTION5: case TE_FUNCTION6: case TE_FUNCTION7:
arity = ARITY(s->type);
ret = new_expr(s->type, 0);
ret->function = s->function;
next_token(s);
if (s->type == TOK_OPEN) {
int i;
for(i = 0; i < arity; i++) {
next_token(s);
ret->parameters[i] = expr(s);
if(s->type != TOK_SEP) {
break;
}
}
if(s->type == TOK_CLOSE && i == arity - 1) {
next_token(s);
} else if (s->type != TOK_ERROR
|| s->error == TE_ERROR_UNKNOWN) {
s->type = TOK_ERROR;
s->error = i < arity ? TE_ERROR_TOO_FEW_ARGS
: TE_ERROR_TOO_MANY_ARGS;
}
} else if (s->type != TOK_ERROR
|| s->error == TE_ERROR_UNKNOWN) {
s->type = TOK_ERROR;
s->error = TE_ERROR_MISSING_OPENING_PAREN;
}
break;
case TOK_OPEN:
next_token(s);
ret = expr(s);
if (s->type == TOK_CLOSE) {
next_token(s);
} else if (s->type != TOK_ERROR
|| s->error == TE_ERROR_UNKNOWN) {
s->type = TOK_ERROR;
s->error = TE_ERROR_MISSING_CLOSING_PAREN;
}
break;
case TOK_END:
// The expression ended before we expected it.
// e.g. `2 - `.
// This means we have too few things.
// Instead of introducing another error, just call it
// "too few args".
ret = new_expr(0, 0);
s->type = TOK_ERROR;
s->error = TE_ERROR_TOO_FEW_ARGS;
ret->value = NAN;
break;
default:
ret = new_expr(0, 0);
s->type = TOK_ERROR;
s->error = TE_ERROR_UNKNOWN;
ret->value = NAN;
break;
}
return ret;
}
static te_expr *power(state *s) {
/* <power> = {("-" | "+")} <base> */
int sign = 1;
while (s->type == TOK_INFIX && (s->function == add || s->function == sub)) {
if (s->function == sub) sign = -sign;
next_token(s);
}
te_expr *ret;
if (sign == 1) {
ret = base(s);
} else {
ret = NEW_EXPR(TE_FUNCTION1 | TE_FLAG_PURE, base(s));
ret->function = negate;
}
return ret;
}
static te_expr *factor(state *s) {
/* <factor> = <power> {"^" <power>} */
te_expr *ret = power(s);
while (s->type == TOK_INFIX && (s->function == pow)) {
te_fun2 t = s->function;
next_token(s);
ret = NEW_EXPR(TE_FUNCTION2 | TE_FLAG_PURE, ret, power(s));
ret->function = t;
}
return ret;
}
static te_expr *term(state *s) {
/* <term> = <factor> {("*" | "/" | "%") <factor>} */
te_expr *ret = factor(s);
while (s->type == TOK_INFIX && (s->function == mul || s->function == divide || s->function == fmod)) {
te_fun2 t = s->function;
next_token(s);
ret = NEW_EXPR(TE_FUNCTION2 | TE_FLAG_PURE, ret, factor(s));
ret->function = t;
}
return ret;
}
static te_expr *expr(state *s) {
/* <expr> = <term> {("+" | "-") <term>} */
te_expr *ret = term(s);
while (s->type == TOK_INFIX && (s->function == add || s->function == sub)) {
te_fun2 t = s->function;
next_token(s);
ret = NEW_EXPR(TE_FUNCTION2 | TE_FLAG_PURE, ret, term(s));
ret->function = t;
}
return ret;
}
#define TE_FUN(...) ((double(*)(__VA_ARGS__))n->function)
#define M(e) te_eval(n->parameters[e])
double te_eval(const te_expr *n) {
if (!n) return NAN;
switch(TYPE_MASK(n->type)) {
case TE_CONSTANT: return n->value;
case TE_VARIABLE: return *n->bound;
case TE_FUNCTION0: case TE_FUNCTION1: case TE_FUNCTION2: case TE_FUNCTION3:
case TE_FUNCTION4: case TE_FUNCTION5: case TE_FUNCTION6: case TE_FUNCTION7:
switch(ARITY(n->type)) {
case 0: return TE_FUN(void)();
case 1: return TE_FUN(double)(M(0));
case 2: return TE_FUN(double, double)(M(0), M(1));
case 3: return TE_FUN(double, double, double)(M(0), M(1), M(2));
case 4: return TE_FUN(double, double, double, double)(M(0), M(1), M(2), M(3));
case 5: return TE_FUN(double, double, double, double, double)(M(0), M(1), M(2), M(3), M(4));
case 6: return TE_FUN(double, double, double, double, double, double)(M(0), M(1), M(2), M(3), M(4), M(5));
case 7: return TE_FUN(double, double, double, double, double, double, double)(M(0), M(1), M(2), M(3), M(4), M(5), M(6));
default: return NAN;
}
default: return NAN;
}
}
#undef TE_FUN
#undef M
static void optimize(te_expr *n) {
/* Evaluates as much as possible. */
if (n->type == TE_CONSTANT) return;
if (n->type == TE_VARIABLE) return;
/* Only optimize out functions flagged as pure. */
if (IS_PURE(n->type)) {
const int arity = ARITY(n->type);
int known = 1;
int i;
for (i = 0; i < arity; ++i) {
optimize(n->parameters[i]);
if (((te_expr*)(n->parameters[i]))->type != TE_CONSTANT) {
known = 0;
}
}
if (known) {
const double value = te_eval(n);
te_free_parameters(n);
n->type = TE_CONSTANT;
n->value = value;
}
}
}
te_expr *te_compile(const char *expression, const te_variable *variables, int var_count, te_error_t *error) {
state s;
s.start = s.next = expression;
s.lookup = variables;
s.lookup_len = var_count;
s.error = TE_ERROR_NONE;
next_token(&s);
te_expr *root = expr(&s);
if (s.type != TOK_END) {
te_free(root);
if (error) {
error->position = (s.next - s.start) + 1;
if (s.error != TE_ERROR_NONE) {
error->type = s.error;
} else {
// If we're not at the end but there's no error, then that means we have a superfluous
// token that we have no idea what to do with.
// This occurs in e.g. `2 + 2 4` - the "4" is just not part of the expression.
// We can report either "too many arguments" or "expected operator", but the operator
// should be reported between the "2" and the "4".
// So we report TOO_MANY_ARGS on the "4".
error->type = TE_ERROR_TOO_MANY_ARGS;
}
}
return 0;
} else {
optimize(root);
if (error) error->position = 0;
return root;
}
}
double te_interp(const char *expression, te_error_t *error) {
te_expr *n = te_compile(expression, 0, 0, error);
double ret;
if (n) {
ret = te_eval(n);
te_free(n);
} else {
ret = NAN;
}
return ret;
}
static void pn (const te_expr *n, int depth) {
int i, arity;
printf("%*s", depth, "");
switch(TYPE_MASK(n->type)) {
case TE_CONSTANT: printf("%f\n", n->value); break;
case TE_VARIABLE: printf("bound %p\n", n->bound); break;
case TE_FUNCTION0: case TE_FUNCTION1: case TE_FUNCTION2: case TE_FUNCTION3:
case TE_FUNCTION4: case TE_FUNCTION5: case TE_FUNCTION6: case TE_FUNCTION7:
arity = ARITY(n->type);
printf("f%d", arity);
for(i = 0; i < arity; i++) {
printf(" %p", n->parameters[i]);
}
printf("\n");
for(i = 0; i < arity; i++) {
pn(n->parameters[i], depth + 1);
}
break;
}
}
void te_print(const te_expr *n) {
pn(n, 0);
}