Make parser functions members of state struct

Also some cleanup:
- removed unnecessary `typedef`s and `using`s
- removed unused TE_FUNCTION3
- separate types for function based on arity
This commit is contained in:
Juho Eerola 2021-07-26 20:46:58 +03:00
parent e27456df24
commit 73bc453eaf
No known key found for this signature in database
GPG key ID: 552C980FFBBED60E

View file

@ -50,7 +50,6 @@ enum {
TE_FUNCTION0, TE_FUNCTION0,
TE_FUNCTION1, TE_FUNCTION1,
TE_FUNCTION2, TE_FUNCTION2,
TE_FUNCTION3,
TOK_NULL, TOK_NULL,
TOK_ERROR, TOK_ERROR,
TOK_END, TOK_END,
@ -62,28 +61,32 @@ enum {
}; };
static int get_arity(const int type) { static int get_arity(const int type) {
if (type == TE_FUNCTION3) return 3;
if (type == TE_FUNCTION2) return 2; if (type == TE_FUNCTION2) return 2;
if (type == TE_FUNCTION1) return 1; if (type == TE_FUNCTION1) return 1;
return 0; return 0;
} }
typedef struct te_expr { struct te_expr_t {
int type; int type;
union { union {
double value; double value;
void *function; te_fun0 fun0;
te_fun1 fun1;
te_fun2 fun2;
};
te_expr_t *parameters[];
}; };
te_expr *parameters[];
} te_expr;
using te_builtin = struct { struct te_builtin {
const wchar_t *name; const wchar_t *name;
void *address; void *address;
int type; int type;
}; };
using state = struct { struct state {
explicit state(const wchar_t *expr) : start{expr}, next{expr} { next_token(); }
te_expr_t *expr();
union { union {
double value; double value;
void *function; void *function;
@ -91,28 +94,25 @@ using state = struct {
const wchar_t *start; const wchar_t *start;
const wchar_t *next; const wchar_t *next;
int type; int type;
te_error_type_t error; te_error_type_t error{TE_ERROR_NONE};
private:
void next_token();
te_expr_t *power();
te_expr_t *base();
te_expr_t *factor();
te_expr_t *term();
}; };
/* Parses the input expression. */
/* Returns NULL on error. */
te_expr *te_compile(const wchar_t *expression, te_error_t *error);
/* Evaluates the expression. */
double te_eval(const te_expr *n);
/* Frees the expression. */
/* This is safe to call on NULL pointers. */
void te_free(te_expr *n);
// TODO: That move there? Ouch. Replace with a proper class with a constructor. // TODO: That move there? Ouch. Replace with a proper class with a constructor.
#define NEW_EXPR(type, ...) new_expr((type), std::move((const te_expr *[]){__VA_ARGS__})) #define NEW_EXPR(type, ...) new_expr((type), std::move((const te_expr_t *[]){__VA_ARGS__}))
static te_expr *new_expr(const int type, const te_expr *parameters[]) { static te_expr_t *new_expr(const int type, const te_expr_t *parameters[]) {
const int arity = get_arity(type); const int arity = get_arity(type);
const int psize = sizeof(te_expr *) * arity; const int psize = sizeof(te_expr_t *) * arity;
const int size = sizeof(te_expr) + psize; const int size = sizeof(te_expr_t) + psize;
auto ret = static_cast<te_expr *>(malloc(size)); auto ret = static_cast<te_expr_t *>(malloc(size));
// This sets float to 0, which depends on the implementation. // This sets float to 0, which depends on the implementation.
// We rely on IEEE-754 floats anyway, so it's okay. // We rely on IEEE-754 floats anyway, so it's okay.
std::memset(ret, 0, size); std::memset(ret, 0, size);
@ -123,7 +123,11 @@ static te_expr *new_expr(const int type, const te_expr *parameters[]) {
return ret; return ret;
} }
static void te_free_parameters(te_expr *n) { /* Frees the expression. */
/* This is safe to call on NULL pointers. */
static void te_free(te_expr_t *n);
static void te_free_parameters(te_expr_t *n) {
if (!n) return; if (!n) return;
int arity = get_arity(n->type); int arity = get_arity(n->type);
// Free all parameters from the back to the front. // Free all parameters from the back to the front.
@ -133,7 +137,7 @@ static void te_free_parameters(te_expr *n) {
} }
} }
void te_free(te_expr *n) { static void te_free(te_expr_t *n) {
if (!n) return; if (!n) return;
te_free_parameters(n); te_free_parameters(n);
free(n); free(n);
@ -261,86 +265,84 @@ static constexpr double divide(double a, double b) {
static constexpr double negate(double a) { return -a; } static constexpr double negate(double a) { return -a; }
static void next_token(state *s) { void state::next_token() {
s->type = TOK_NULL; type = TOK_NULL;
do { do {
if (!*s->next) { if (!*next) {
s->type = TOK_END; type = TOK_END;
return; return;
} }
/* Try reading a number. */ /* Try reading a number. */
if ((s->next[0] >= '0' && s->next[0] <= '9') || s->next[0] == '.') { if ((next[0] >= '0' && next[0] <= '9') || next[0] == '.') {
s->value = fish_wcstod(s->next, const_cast<wchar_t **>(&s->next)); value = fish_wcstod(next, const_cast<wchar_t **>(&next));
s->type = TOK_NUMBER; type = TOK_NUMBER;
} else { } else {
/* Look for a function call. */ /* Look for a function call. */
// But not when it's an "x" followed by whitespace // But not when it's an "x" followed by whitespace
// - that's the alternative multiplication operator. // - that's the alternative multiplication operator.
if (s->next[0] >= 'a' && s->next[0] <= 'z' && if (next[0] >= 'a' && next[0] <= 'z' && !(next[0] == 'x' && isspace(next[1]))) {
!(s->next[0] == 'x' && isspace(s->next[1]))) {
const wchar_t *start; const wchar_t *start;
start = s->next; start = next;
while ((s->next[0] >= 'a' && s->next[0] <= 'z') || while ((next[0] >= 'a' && next[0] <= 'z') || (next[0] >= '0' && next[0] <= '9') ||
(s->next[0] >= '0' && s->next[0] <= '9') || (s->next[0] == '_')) (next[0] == '_'))
s->next++; next++;
const te_builtin *var = find_builtin(start, s->next - start); const te_builtin *var = find_builtin(start, next - start);
if (var) { if (var) {
switch (var->type) { switch (var->type) {
case TE_FUNCTION0: case TE_FUNCTION0:
case TE_FUNCTION1: case TE_FUNCTION1:
case TE_FUNCTION2: case TE_FUNCTION2:
case TE_FUNCTION3: type = var->type;
s->type = var->type; function = var->address;
s->function = var->address;
break; break;
} }
} else if (s->type != TOK_ERROR || s->error == TE_ERROR_UNKNOWN) { } else if (type != TOK_ERROR || error == TE_ERROR_UNKNOWN) {
// Our error is more specific, so it takes precedence. // Our error is more specific, so it takes precedence.
s->type = TOK_ERROR; type = TOK_ERROR;
s->error = TE_ERROR_UNKNOWN_FUNCTION; error = TE_ERROR_UNKNOWN_FUNCTION;
} }
} else { } else {
/* Look for an operator or special character. */ /* Look for an operator or special character. */
switch (s->next++[0]) { switch (next++[0]) {
// The "te_fun2" casts are necessary to pick the right overload. // The "te_fun2" casts are necessary to pick the right overload.
case '+': case '+':
s->type = TOK_INFIX; type = TOK_INFIX;
s->function = reinterpret_cast<void *>(static_cast<te_fun2>(add)); function = reinterpret_cast<void *>(static_cast<te_fun2>(add));
break; break;
case '-': case '-':
s->type = TOK_INFIX; type = TOK_INFIX;
s->function = reinterpret_cast<void *>(static_cast<te_fun2>(sub)); function = reinterpret_cast<void *>(static_cast<te_fun2>(sub));
break; break;
case 'x': case 'x':
case '*': case '*':
// We've already checked for whitespace above. // We've already checked for whitespace above.
s->type = TOK_INFIX; type = TOK_INFIX;
s->function = reinterpret_cast<void *>(static_cast<te_fun2>(mul)); function = reinterpret_cast<void *>(static_cast<te_fun2>(mul));
break; break;
case '/': case '/':
s->type = TOK_INFIX; type = TOK_INFIX;
s->function = reinterpret_cast<void *>(static_cast<te_fun2>(divide)); function = reinterpret_cast<void *>(static_cast<te_fun2>(divide));
break; break;
case '^': case '^':
s->type = TOK_INFIX; type = TOK_INFIX;
s->function = reinterpret_cast<void *>(static_cast<te_fun2>(pow)); function = reinterpret_cast<void *>(static_cast<te_fun2>(pow));
break; break;
case '%': case '%':
s->type = TOK_INFIX; type = TOK_INFIX;
s->function = reinterpret_cast<void *>(static_cast<te_fun2>(fmod)); function = reinterpret_cast<void *>(static_cast<te_fun2>(fmod));
break; break;
case '(': case '(':
s->type = TOK_OPEN; type = TOK_OPEN;
break; break;
case ')': case ')':
s->type = TOK_CLOSE; type = TOK_CLOSE;
break; break;
case ',': case ',':
s->type = TOK_SEP; type = TOK_SEP;
break; break;
case ' ': case ' ':
case '\t': case '\t':
@ -353,126 +355,122 @@ static void next_token(state *s) {
case '&': case '&':
case '|': case '|':
case '!': case '!':
s->type = TOK_ERROR; type = TOK_ERROR;
s->error = TE_ERROR_LOGICAL_OPERATOR; error = TE_ERROR_LOGICAL_OPERATOR;
break; break;
default: default:
s->type = TOK_ERROR; type = TOK_ERROR;
s->error = TE_ERROR_MISSING_OPERATOR; error = TE_ERROR_MISSING_OPERATOR;
break; break;
} }
} }
} }
} while (s->type == TOK_NULL); } while (type == TOK_NULL);
} }
static te_expr *expr(state *s); te_expr_t *state::base() {
static te_expr *power(state *s);
static te_expr *base(state *s) {
/* <base> = <constant> | <function-0> {"(" ")"} | <function-1> <power> | /* <base> = <constant> | <function-0> {"(" ")"} | <function-1> <power> |
* <function-X> "(" <expr> {"," <expr>} ")" | "(" <list> ")" */ * <function-X> "(" <expr> {"," <expr>} ")" | "(" <list> ")" */
te_expr *ret; te_expr_t *ret;
int arity; int arity;
auto previous = s->start; auto previous = start;
auto next = s->next; auto next = this->next;
switch (s->type) { switch (type) {
case TOK_NUMBER: case TOK_NUMBER:
ret = new_expr(TE_CONSTANT, nullptr); ret = new_expr(TE_CONSTANT, nullptr);
ret->value = s->value; ret->value = value;
next_token(s); next_token();
if (s->type == TOK_NUMBER || s->type == TE_FUNCTION0) { if (type == TOK_NUMBER || type == TE_FUNCTION0) {
// Two numbers after each other: // Two numbers after each other:
// math '5 2' // math '5 2'
// math '3 pi' // math '3 pi'
// (of course 3 pi could also be interpreted as 3 x pi) // (of course 3 pi could also be interpreted as 3 x pi)
s->type = TOK_ERROR; type = TOK_ERROR;
s->error = TE_ERROR_MISSING_OPERATOR; error = TE_ERROR_MISSING_OPERATOR;
// The error should be given *between* // The error should be given *between*
// the last two tokens. // the last two tokens.
// Since these are two separate numbers there is at least // Since these are two separate numbers there is at least
// one space between. // one space between.
s->start = previous; start = previous;
s->next = next + 1; this->next = next + 1;
} }
break; break;
case TE_FUNCTION0: case TE_FUNCTION0:
ret = new_expr(s->type, nullptr); ret = new_expr(type, nullptr);
ret->function = s->function; ret->fun0 = reinterpret_cast<te_fun0>(function);
next_token(s); next_token();
if (s->type == TOK_OPEN) { if (type == TOK_OPEN) {
next_token(s); next_token();
if (s->type == TOK_CLOSE) { if (type == TOK_CLOSE) {
next_token(s); next_token();
} else if (s->type != TOK_ERROR || s->error == TE_ERROR_UNKNOWN) { } else if (type != TOK_ERROR || error == TE_ERROR_UNKNOWN) {
s->type = TOK_ERROR; type = TOK_ERROR;
s->error = TE_ERROR_MISSING_CLOSING_PAREN; error = TE_ERROR_MISSING_CLOSING_PAREN;
} }
} }
break; break;
case TE_FUNCTION1: case TE_FUNCTION1:
case TE_FUNCTION2: case TE_FUNCTION2: {
case TE_FUNCTION3: { arity = get_arity(type);
arity = get_arity(s->type);
ret = new_expr(s->type, nullptr); ret = new_expr(type, nullptr);
ret->function = s->function; ret->fun0 = reinterpret_cast<te_fun0>(function);
next_token(s); next_token();
bool have_open = false; bool have_open = false;
if (s->type == TOK_OPEN) { if (type == TOK_OPEN) {
// If we *have* an opening parenthesis, // If we *have* an opening parenthesis,
// we need to consume it and // we need to consume it and
// expect a closing one. // expect a closing one.
have_open = true; have_open = true;
next_token(s); next_token();
} }
int i; int i;
for (i = 0; i < arity; i++) { for (i = 0; i < arity; i++) {
ret->parameters[i] = expr(s); ret->parameters[i] = expr();
if (s->type != TOK_SEP) { if (type != TOK_SEP) {
break; break;
} }
next_token(s); next_token();
} }
if (!have_open && i == arity - 1) { if (!have_open && i == arity - 1) {
break; break;
} }
if (have_open && s->type == TOK_CLOSE && i == arity - 1) { if (have_open && type == TOK_CLOSE && i == arity - 1) {
// We have an opening and a closing paren, consume the closing one and done. // We have an opening and a closing paren, consume the closing one and done.
next_token(s); next_token();
} else if (s->type != TOK_ERROR || s->error == TE_ERROR_UNEXPECTED_TOKEN) { } else if (type != TOK_ERROR || error == TE_ERROR_UNEXPECTED_TOKEN) {
// If we had the right number of arguments, we're missing a closing paren. // If we had the right number of arguments, we're missing a closing paren.
if (have_open && i == arity - 1 && s->type != TOK_ERROR) { if (have_open && i == arity - 1 && type != TOK_ERROR) {
s->error = TE_ERROR_MISSING_CLOSING_PAREN; error = TE_ERROR_MISSING_CLOSING_PAREN;
} else { } else {
// Otherwise we complain about the number of arguments *first*, // Otherwise we complain about the number of arguments *first*,
// a closing parenthesis should be more obvious. // a closing parenthesis should be more obvious.
s->error = i < arity ? TE_ERROR_TOO_FEW_ARGS : TE_ERROR_TOO_MANY_ARGS; error = i < arity ? TE_ERROR_TOO_FEW_ARGS : TE_ERROR_TOO_MANY_ARGS;
} }
s->type = TOK_ERROR; type = TOK_ERROR;
} }
break; break;
} }
case TOK_OPEN: case TOK_OPEN:
next_token(s); next_token();
ret = expr(s); ret = expr();
if (s->type == TOK_CLOSE) { if (type == TOK_CLOSE) {
next_token(s); next_token();
} else if (s->type != TOK_ERROR && s->type != TOK_END && s->error == TE_ERROR_NONE) { } else if (type != TOK_ERROR && type != TOK_END && error == TE_ERROR_NONE) {
s->type = TOK_ERROR; type = TOK_ERROR;
s->error = TE_ERROR_TOO_MANY_ARGS; error = TE_ERROR_TOO_MANY_ARGS;
} else if (s->type != TOK_ERROR || s->error == TE_ERROR_UNKNOWN) { } else if (type != TOK_ERROR || error == TE_ERROR_UNKNOWN) {
s->type = TOK_ERROR; type = TOK_ERROR;
s->error = TE_ERROR_MISSING_CLOSING_PAREN; error = TE_ERROR_MISSING_CLOSING_PAREN;
} }
break; break;
@ -483,15 +481,15 @@ static te_expr *base(state *s) {
// Instead of introducing another error, just call it // Instead of introducing another error, just call it
// "too few args". // "too few args".
ret = new_expr(0, nullptr); ret = new_expr(0, nullptr);
s->type = TOK_ERROR; type = TOK_ERROR;
s->error = TE_ERROR_TOO_FEW_ARGS; error = TE_ERROR_TOO_FEW_ARGS;
ret->value = NAN; ret->value = NAN;
break; break;
default: default:
ret = new_expr(0, nullptr); ret = new_expr(0, nullptr);
if (s->type != TOK_ERROR || s->error == TE_ERROR_UNKNOWN) { if (type != TOK_ERROR || error == TE_ERROR_UNKNOWN) {
s->type = TOK_ERROR; type = TOK_ERROR;
s->error = TE_ERROR_UNEXPECTED_TOKEN; error = TE_ERROR_UNEXPECTED_TOKEN;
} }
ret->value = NAN; ret->value = NAN;
break; break;
@ -500,46 +498,45 @@ static te_expr *base(state *s) {
return ret; return ret;
} }
static te_expr *power(state *s) { te_expr_t *state::power() {
/* <power> = {("-" | "+")} <base> */ /* <power> = {("-" | "+")} <base> */
int sign = 1; int sign = 1;
while (s->type == TOK_INFIX && (s->function == add || s->function == sub)) { while (type == TOK_INFIX && (function == add || function == sub)) {
if (s->function == sub) sign = -sign; if (function == sub) sign = -sign;
next_token(s); next_token();
} }
te_expr *ret; te_expr_t *ret;
if (sign == 1) { if (sign == 1) {
ret = base(s); ret = base();
} else { } else {
ret = NEW_EXPR(TE_FUNCTION1, base(s)); ret = NEW_EXPR(TE_FUNCTION1, base());
ret->function = reinterpret_cast<void *>(negate); ret->fun1 = negate;
} }
return ret; return ret;
} }
static te_expr *factor(state *s) { te_expr_t *state::factor() {
/* <factor> = <power> {"^" <power>} */ /* <factor> = <power> {"^" <power>} */
te_expr *ret = power(s); te_expr_t *ret = power();
te_expr *insertion = nullptr; te_expr_t *insertion = nullptr;
while (s->type == TOK_INFIX && while (type == TOK_INFIX && (function == reinterpret_cast<void *>(static_cast<te_fun2>(pow)))) {
(s->function == reinterpret_cast<void *>(static_cast<te_fun2>(pow)))) { auto t = reinterpret_cast<te_fun2>(function);
auto t = reinterpret_cast<te_fun2>(s->function); next_token();
next_token(s);
if (insertion) { if (insertion) {
/* Make exponentiation go right-to-left. */ /* Make exponentiation go right-to-left. */
te_expr *insert = NEW_EXPR(TE_FUNCTION2, insertion->parameters[1], power(s)); te_expr_t *insert = NEW_EXPR(TE_FUNCTION2, insertion->parameters[1], power());
insert->function = reinterpret_cast<void *>(t); insert->fun2 = t;
insertion->parameters[1] = insert; insertion->parameters[1] = insert;
insertion = insert; insertion = insert;
} else { } else {
ret = NEW_EXPR(TE_FUNCTION2, ret, power(s)); ret = NEW_EXPR(TE_FUNCTION2, ret, power());
ret->function = reinterpret_cast<void *>(t); ret->fun2 = t;
insertion = ret; insertion = ret;
} }
} }
@ -547,63 +544,60 @@ static te_expr *factor(state *s) {
return ret; return ret;
} }
static te_expr *term(state *s) { te_expr_t *state::term() {
/* <term> = <factor> {("*" | "/" | "%") <factor>} */ /* <term> = <factor> {("*" | "/" | "%") <factor>} */
te_expr *ret = factor(s); te_expr_t *ret = factor();
while (s->type == TOK_INFIX && while (type == TOK_INFIX &&
(s->function == reinterpret_cast<void *>(static_cast<te_fun2>(mul)) || (function == reinterpret_cast<void *>(static_cast<te_fun2>(mul)) ||
s->function == reinterpret_cast<void *>(static_cast<te_fun2>(divide)) || function == reinterpret_cast<void *>(static_cast<te_fun2>(divide)) ||
s->function == reinterpret_cast<void *>(static_cast<te_fun2>(fmod)))) { function == reinterpret_cast<void *>(static_cast<te_fun2>(fmod)))) {
auto t = reinterpret_cast<te_fun2>(s->function); auto t = reinterpret_cast<te_fun2>(function);
next_token(s); next_token();
ret = NEW_EXPR(TE_FUNCTION2, ret, factor(s)); ret = NEW_EXPR(TE_FUNCTION2, ret, factor());
ret->function = reinterpret_cast<void *>(t); ret->fun2 = t;
} }
return ret; return ret;
} }
static te_expr *expr(state *s) { te_expr_t *state::expr() {
/* <expr> = <term> {("+" | "-") <term>} */ /* <expr> = <term> {("+" | "-") <term>} */
te_expr *ret = term(s); te_expr_t *ret = term();
while (s->type == TOK_INFIX && (s->function == add || s->function == sub)) { while (type == TOK_INFIX && (function == add || function == sub)) {
auto t = reinterpret_cast<te_fun2>(s->function); auto t = reinterpret_cast<te_fun2>(function);
next_token(s); next_token();
ret = NEW_EXPR(TE_FUNCTION2, ret, term(s)); ret = NEW_EXPR(TE_FUNCTION2, ret, term());
ret->function = reinterpret_cast<void *>(t); ret->fun2 = t;
} }
return ret; return ret;
} }
#define TE_FUN(...) ((double (*)(__VA_ARGS__))n->function)
#define M(e) te_eval(n->parameters[e]) #define M(e) te_eval(n->parameters[e])
double te_eval(const te_expr *n) { /* Evaluates the expression. */
static double te_eval(const te_expr_t *n) {
if (!n) return NAN; if (!n) return NAN;
switch (n->type) { switch (n->type) {
case TE_CONSTANT: case TE_CONSTANT:
return n->value; return n->value;
case TE_FUNCTION0: case TE_FUNCTION0:
return TE_FUN(void)(); return n->fun0();
case TE_FUNCTION1: case TE_FUNCTION1:
return TE_FUN(double)(M(0)); return n->fun1(M(0));
case TE_FUNCTION2: case TE_FUNCTION2:
return TE_FUN(double, double)(M(0), M(1)); return n->fun2(M(0), M(1));
case TE_FUNCTION3:
return TE_FUN(double, double, double)(M(0), M(1), M(2));
default: default:
return NAN; return NAN;
} }
} }
#undef TE_FUN
#undef M #undef M
static void optimize(te_expr *n) { static void optimize(te_expr_t *n) {
/* Evaluates as much as possible. */ /* Evaluates as much as possible. */
if (!n || n->type == TE_CONSTANT) return; if (!n || n->type == TE_CONSTANT) return;
@ -623,13 +617,12 @@ static void optimize(te_expr *n) {
} }
} }
te_expr *te_compile(const wchar_t *expression, te_error_t *error) { /* Parses the input expression. */
state s; /* Returns NULL on error. */
s.start = s.next = expression; static te_expr_t *te_compile(const wchar_t *expression, te_error_t *error) {
s.error = TE_ERROR_NONE; state s{expression};
next_token(&s); te_expr_t *root = s.expr();
te_expr *root = expr(&s);
if (s.type != TOK_END) { if (s.type != TOK_END) {
te_free(root); te_free(root);
@ -652,7 +645,7 @@ te_expr *te_compile(const wchar_t *expression, te_error_t *error) {
} }
double te_interp(const wchar_t *expression, te_error_t *error) { double te_interp(const wchar_t *expression, te_error_t *error) {
te_expr *n = te_compile(expression, error); te_expr_t *n = te_compile(expression, error);
double ret; double ret;
if (n) { if (n) {
ret = te_eval(n); ret = te_eval(n);