Improvements to new parser. All functions and completions now parse.

This commit is contained in:
ridiculousfish 2013-07-22 18:26:15 -07:00
parent 77b6b0a9b2
commit 3e3eefc2dc
13 changed files with 1080 additions and 879 deletions

View file

@ -4011,7 +4011,7 @@ struct parse_execution_simulator_t : public parse_execution_visitor_t
break;
default:
break;
break;
}
line.append(L"cmd:");
@ -4025,7 +4025,8 @@ struct parse_execution_simulator_t : public parse_execution_visitor_t
}
}
void visit_function(const exec_function_header_t &function) {
void visit_function(const exec_function_header_t &function)
{
wcstring &line = this->back();
line.append(L"define function: ");
wcstring tmp;
@ -4074,17 +4075,19 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
}
else
{
parse_execution_context_t ctx(parse_tree, src);
parse_execution_simulator_t sim;
sim.context = &ctx;
while (ctx.visit_next_node(&sim))
{
}
stdout_buffer.append(L"Simulating execution:\n");
for (size_t i=0; i < sim.result.size(); i++)
{
stdout_buffer.append(sim.result.at(i));
stdout_buffer.push_back(L'\n');
if (0) {
parse_execution_context_t ctx(parse_tree, src);
parse_execution_simulator_t sim;
sim.context = &ctx;
while (ctx.visit_next_node(&sim))
{
}
stdout_buffer.append(L"Simulating execution:\n");
for (size_t i=0; i < sim.result.size(); i++)
{
stdout_buffer.append(sim.result.at(i));
stdout_buffer.push_back(L'\n');
}
}
}
}

View file

@ -545,8 +545,8 @@ static bool can_use_posix_spawn_for_job(const job_t *job, const process_t *proce
const shared_ptr<const io_data_t> &io = job->io.at(idx);
if (redirection_is_to_real_file(io.get()))
{
result = false;
break;
result = false;
break;
}
}
return result;

View file

@ -465,6 +465,7 @@
D0D2693C159835CA005D9B9C /* fish */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = fish; sourceTree = BUILT_PRODUCTS_DIR; };
D0F3373A1506DE3C00ECEFC0 /* builtin_test.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = builtin_test.cpp; sourceTree = "<group>"; };
D0F5E28415A7A32D00315DFF /* config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = config.h; sourceTree = "<group>"; };
D0FE8EE6179CA8A5008C9F21 /* parse_tree_construction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_tree_construction.h; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -589,6 +590,7 @@
D0A0850C13B3ACEE0099B651 /* expand.h */,
D0A0853D13B3ACEE0099B651 /* expand.cpp */,
D0C52F361765284C00BFAB82 /* parse_tree.h */,
D0FE8EE6179CA8A5008C9F21 /* parse_tree_construction.h */,
D0C52F351765284C00BFAB82 /* parse_tree.cpp */,
D0C52F341765281F00BFAB82 /* parse_exec.h */,
D0C52F331765281F00BFAB82 /* parse_exec.cpp */,

View file

@ -130,21 +130,21 @@ class parse_exec_t
{
case parse_token_type_string:
// Argument
{
exec_argument_t arg = exec_argument_t();
arg.parse_node_idx = child_idx;
output->arguments.push_back(arg);
}
break;
{
exec_argument_t arg = exec_argument_t();
arg.parse_node_idx = child_idx;
output->arguments.push_back(arg);
}
break;
case parse_token_type_redirection:
// Redirection
{
exec_redirection_t redirect = exec_redirection_t();
redirect.parse_node_idx = child_idx;
output->redirections.push_back(redirect);
}
break;
{
exec_redirection_t redirect = exec_redirection_t();
redirect.parse_node_idx = child_idx;
output->redirections.push_back(redirect);
}
break;
default:
PARSER_DIE();
@ -345,7 +345,7 @@ class parse_exec_t
void enter_parse_node(size_t idx);
void run_top_node(void);
public:
public:
void get_node_string(node_offset_t idx, wcstring *output) const
{
@ -458,7 +458,7 @@ void parse_exec_t::run_top_node()
case symbol_if_statement:
{
PARSE_ASSERT(parse_node.child_count == 3);
PARSE_ASSERT(parse_node.child_count == 4);
pop_push(0, 2);
break;
}
@ -528,7 +528,7 @@ void parse_exec_t::run_top_node()
case symbol_plain_statement:
case symbol_arguments_or_redirections_list:
case symbol_argument_or_redirection:
fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx);
fprintf(stderr, "Unexpected token type %ls at index %ld. This should have been handled by the parent.\n", token_type_description(parse_node.type).c_str(), exec_node.parse_node_idx);
PARSER_DIE();
break;

View file

@ -14,7 +14,7 @@ class parse_execution_context_t
{
parse_exec_t *ctx; //owned
public:
public:
parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s);
~parse_execution_context_t();
@ -143,11 +143,20 @@ struct parse_execution_visitor_t
{
}
virtual bool enter_job_list(void) { return true; }
virtual bool enter_job(void) { return true; }
virtual bool enter_job_list(void)
{
return true;
}
virtual bool enter_job(void)
{
return true;
}
virtual void visit_statement(void) { }
virtual void visit_function(const exec_function_header_t &function) { }
virtual bool enter_block_statement(const exec_block_statement_t &statement) { return true; }
virtual bool enter_block_statement(const exec_block_statement_t &statement)
{
return true;
}
virtual void enter_if_clause(const exec_if_clause_t &statement) { }
virtual void exit_if_clause(const exec_if_clause_t &statement) { }

View file

@ -1,4 +1,4 @@
#include "parse_tree.h"
#include "parse_tree_construction.h"
#include "tokenizer.h"
#include <vector>
@ -14,9 +14,10 @@ wcstring parse_error_t::describe(const wcstring &src) const
// Look for a newline prior to source_start. If we don't find one, start at the beginning of the string; otherwise start one past the newline
size_t newline = src.find_last_of(L'\n', source_start);
fprintf(stderr, "newline: %lu, source_start %lu, source_length %lu\n", newline, source_start, source_length);
if (newline != wcstring::npos)
{
line_start = newline + 1;
line_start = newline;// + 1;
}
size_t line_end = src.find(L'\n', source_start + source_length);
@ -25,6 +26,7 @@ wcstring parse_error_t::describe(const wcstring &src) const
line_end = src.size();
}
assert(line_end >= line_start);
fprintf(stderr, "source start: %lu, line start %lu\n", source_start, line_start);
assert(source_start >= line_start);
// Append the line of text
@ -43,68 +45,115 @@ wcstring token_type_description(parse_token_type_t type)
{
switch (type)
{
case token_type_invalid: return L"invalid";
case token_type_invalid:
return L"invalid";
case symbol_job_list: return L"job_list";
case symbol_job: return L"job";
case symbol_job_continuation: return L"job_continuation";
case symbol_job_list:
return L"job_list";
case symbol_job:
return L"job";
case symbol_job_continuation:
return L"job_continuation";
case symbol_statement: return L"statement";
case symbol_block_statement: return L"block_statement";
case symbol_block_header: return L"block_header";
case symbol_for_header: return L"for_header";
case symbol_while_header: return L"while_header";
case symbol_begin_header: return L"begin_header";
case symbol_function_header: return L"function_header";
case symbol_statement:
return L"statement";
case symbol_block_statement:
return L"block_statement";
case symbol_block_header:
return L"block_header";
case symbol_for_header:
return L"for_header";
case symbol_while_header:
return L"while_header";
case symbol_begin_header:
return L"begin_header";
case symbol_function_header:
return L"function_header";
case symbol_if_statement: return L"if_statement";
case symbol_if_clause: return L"if_clause";
case symbol_else_clause: return L"else_clause";
case symbol_else_continuation: return L"else_continuation";
case symbol_if_statement:
return L"if_statement";
case symbol_if_clause:
return L"if_clause";
case symbol_else_clause:
return L"else_clause";
case symbol_else_continuation:
return L"else_continuation";
case symbol_switch_statement: return L"switch_statement";
case symbol_case_item_list: return L"case_item_list";
case symbol_case_item: return L"case_item";
case symbol_switch_statement:
return L"switch_statement";
case symbol_case_item_list:
return L"case_item_list";
case symbol_case_item:
return L"case_item";
case symbol_argument_list_nonempty: return L"argument_list_nonempty";
case symbol_argument_list: return L"argument_list";
case symbol_argument_list_nonempty:
return L"argument_list_nonempty";
case symbol_argument_list:
return L"argument_list";
case symbol_boolean_statement: return L"boolean_statement";
case symbol_decorated_statement: return L"decorated_statement";
case symbol_plain_statement: return L"plain_statement";
case symbol_arguments_or_redirections_list: return L"arguments_or_redirections_list";
case symbol_argument_or_redirection: return L"argument_or_redirection";
case symbol_boolean_statement:
return L"boolean_statement";
case symbol_decorated_statement:
return L"decorated_statement";
case symbol_plain_statement:
return L"plain_statement";
case symbol_arguments_or_redirections_list:
return L"arguments_or_redirections_list";
case symbol_argument_or_redirection:
return L"argument_or_redirection";
case parse_token_type_string: return L"token_string";
case parse_token_type_pipe: return L"token_pipe";
case parse_token_type_redirection: return L"token_redirection";
case parse_token_background: return L"token_background";
case parse_token_type_end: return L"token_end";
case parse_token_type_terminate: return L"token_terminate";
default: return format_string(L"Unknown token type %ld", static_cast<long>(type));
case parse_token_type_string:
return L"token_string";
case parse_token_type_pipe:
return L"token_pipe";
case parse_token_type_redirection:
return L"token_redirection";
case parse_token_type_background:
return L"token_background";
case parse_token_type_end:
return L"token_end";
case parse_token_type_terminate:
return L"token_terminate";
case symbol_optional_background:
return L"optional_background";
}
return format_string(L"Unknown token type %ld", static_cast<long>(type));
}
wcstring keyword_description(parse_keyword_t k)
{
switch (k)
{
case parse_keyword_none: return L"none";
case parse_keyword_if: return L"if";
case parse_keyword_else: return L"else";
case parse_keyword_for: return L"for";
case parse_keyword_in: return L"in";
case parse_keyword_while: return L"while";
case parse_keyword_begin: return L"begin";
case parse_keyword_function: return L"function";
case parse_keyword_switch: return L"switch";
case parse_keyword_end: return L"end";
case parse_keyword_and: return L"and";
case parse_keyword_or: return L"or";
case parse_keyword_not: return L"not";
case parse_keyword_command: return L"command";
case parse_keyword_builtin: return L"builtin";
case parse_keyword_none:
return L"none";
case parse_keyword_if:
return L"if";
case parse_keyword_else:
return L"else";
case parse_keyword_for:
return L"for";
case parse_keyword_in:
return L"in";
case parse_keyword_while:
return L"while";
case parse_keyword_begin:
return L"begin";
case parse_keyword_function:
return L"function";
case parse_keyword_switch:
return L"switch";
case parse_keyword_end:
return L"end";
case parse_keyword_and:
return L"and";
case parse_keyword_or:
return L"or";
case parse_keyword_not:
return L"not";
case parse_keyword_command:
return L"command";
case parse_keyword_builtin:
return L"builtin";
default:
return format_string(L"Unknown keyword type %ld", static_cast<long>(k));
}
@ -157,9 +206,18 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_
break;
case TOK_BACKGROUND:
result.type = parse_token_background;
result.type = parse_token_type_background;
break;
case TOK_REDIRECT_OUT:
case TOK_REDIRECT_APPEND:
case TOK_REDIRECT_IN:
case TOK_REDIRECT_FD:
case TOK_REDIRECT_NOCLOB:
result.type = parse_token_type_redirection;
break;
default:
fprintf(stderr, "Bad token type %d passed to %s\n", (int)tokenizer_token_type, __FUNCTION__);
assert(0);
@ -266,12 +324,8 @@ class parse_ll_t
void accept_token_job_list(parse_token_t token);
void accept_token_job(parse_token_t token);
void accept_token_job_continuation(parse_token_t token);
void accept_token_statement(parse_token_t token);
void accept_token_block_header(parse_token_t token);
void accept_token_else_clause(parse_token_t token);
void accept_token_else_continuation(parse_token_t token);
void accept_token_boolean_statement(parse_token_t token);
void accept_token_decorated_statement(parse_token_t token);
void accept_token_plain_statement(parse_token_t token);
void accept_token_argument_list(parse_token_t token);
void accept_token_arguments_or_redirections_list(parse_token_t token);
@ -281,6 +335,7 @@ class parse_ll_t
void token_unhandled(parse_token_t token, const char *function);
void parse_error(const wchar_t *expected, parse_token_t token);
void parse_error(parse_token_t token, const wchar_t *format, ...);
void append_error_callout(wcstring &error_message, parse_token_t token);
void dump_stack(void) const;
@ -324,7 +379,7 @@ class parse_ll_t
{
// Logging?
if (1)
if (0)
{
fprintf(stderr, "Pop %ls (%lu)\n", token_type_description(symbol_stack.back().type).c_str(), symbol_stack.size());
if (tok5.type != token_type_invalid) fprintf(stderr, "Push %ls\n", tok5.describe().c_str());
@ -362,29 +417,67 @@ class parse_ll_t
}
template<typename T>
inline void symbol_stack_pop_push2()
inline void symbol_stack_pop_push2(typename T::magic_seq_type_t x = 0)
{
symbol_stack_pop_push_int(T::t0::get_token(), T::t1::get_token(), T::t2::get_token(), T::t3::get_token(), T::t4::get_token());
}
template<typename T>
inline void symbol_stack_pop_push_production(int which)
inline void symbol_stack_pop_push2(typename T::magic_symbol_type_t x = 0)
{
symbol_stack_pop_push_int(T::get_token());
}
// Singular. Sole productions are always of type Seq.
template<typename T>
inline void symbol_stack_produce(parse_token_t tok, typename T::sole_production::magic_seq_type_t magic=0)
{
typedef typename T::sole_production seq;
symbol_stack_pop_push_int(seq::t0::get_token(), seq::t1::get_token(), seq::t2::get_token(), seq::t3::get_token(), seq::t4::get_token());
}
// Plural productions, of type Or.
template<typename T>
inline void symbol_stack_produce(parse_token_t tok, typename T::productions::magic_or_type_t magic=0)
{
typedef typename T::productions ors;
int which = T::production(tok.type, tok.keyword);
switch (which)
{
case 0: symbol_stack_pop_push2<typename T::p0>(); break;
case 1: symbol_stack_pop_push2<typename T::p1>(); break;
case 2: symbol_stack_pop_push2<typename T::p2>(); break;
case 3: symbol_stack_pop_push2<typename T::p3>(); break;
case 4: symbol_stack_pop_push2<typename T::p4>(); break;
case 0:
symbol_stack_pop_push2<typename ors::p0>();
break;
case 1:
symbol_stack_pop_push2<typename ors::p1>();
break;
case 2:
symbol_stack_pop_push2<typename ors::p2>();
break;
case 3:
symbol_stack_pop_push2<typename ors::p3>();
break;
case 4:
symbol_stack_pop_push2<typename ors::p4>();
break;
case NO_PRODUCTION:
parse_error(tok, L"Failed to produce with stack top '%ls' for token '%ls'\n", symbol_stack.back().describe().c_str(), tok.describe().c_str());
break;
default:
parse_error(tok, L"Unexpected production %d for token %ls\n", which, tok.describe().c_str());
break;
}
}
// Non-sequence basic productions
template<typename T>
inline void symbol_stack_produce(parse_token_t tok)
inline void symbol_stack_produce(parse_token_t tok, typename T::sole_production::magic_symbol_type_t magic=0)
{
symbol_stack_pop_push_production<T>(T::production(tok.type, tok.keyword));
symbol_stack_pop_push_int(T::sole_production::get_token());
}
};
void parse_ll_t::dump_stack(void) const
@ -422,9 +515,31 @@ void parse_ll_t::token_unhandled(parse_token_t token, const char *function)
{
fprintf(stderr, "Unhandled token with type %ls in function %s\n", token_type_description(token.type).c_str(), function);
this->dump_stack();
PARSER_DIE();
parse_error_t err;
err.text = format_string(L"Unhandled token with type %ls in function %s", token_type_description(token.type).c_str(), function);
err.source_start = token.source_start;
err.source_length = token.source_length;
this->errors.push_back(err);
this->fatal_errored = true;
}
void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...)
{
this->dump_stack();
parse_error_t err;
va_list va;
va_start(va, fmt);
err.text = vformat_string(fmt, va);
va_end(va);
err.source_start = token.source_start;
err.source_length = token.source_length;
this->errors.push_back(err);
this->fatal_errored = true;
}
void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token)
{
wcstring desc = token_type_description(token.type);
@ -436,172 +551,6 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token)
fatal_errored = true;
}
void parse_ll_t::accept_token_job_list(parse_token_t token)
{
PARSE_ASSERT(stack_top_type() == symbol_job_list);
switch (token.type)
{
case parse_token_type_string:
// 'end' is special
switch (token.keyword)
{
case parse_keyword_end:
case parse_keyword_else:
// End this job list
symbol_stack_pop_push_production<job_list>(0);
break;
default:
// Normal string
symbol_stack_pop_push_production<job_list>(1);
break;
}
break;
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_background:
symbol_stack_pop_push_production<job_list>(1);
break;
case parse_token_type_end:
// Empty line
symbol_stack_pop_push_production<job_list>(2);
break;
case parse_token_type_terminate:
// no more commands, just transition to empty
symbol_stack_pop_push_production<job_list>(0);
break;
default:
token_unhandled(token, __FUNCTION__);
break;
}
}
void parse_ll_t::accept_token_job_continuation(parse_token_t token)
{
PARSE_ASSERT(stack_top_type() == symbol_job_continuation);
switch (token.type)
{
case parse_token_type_pipe:
// Pipe, continuation
symbol_stack_pop_push_production<job_continuation>(1);
break;
default:
// Not a pipe, no job continuation
symbol_stack_pop_push_production<job_continuation>(0);
break;
}
}
void parse_ll_t::accept_token_statement(parse_token_t token)
{
PARSE_ASSERT(stack_top_type() == symbol_statement);
switch (token.type)
{
case parse_token_type_string:
switch (token.keyword)
{
case parse_keyword_and:
case parse_keyword_or:
case parse_keyword_not:
symbol_stack_pop_push_production<statement>(0);
break;
case parse_keyword_for:
case parse_keyword_while:
case parse_keyword_function:
case parse_keyword_begin:
symbol_stack_pop_push_production<statement>(1);
break;
case parse_keyword_if:
symbol_stack_pop_push_production<statement>(2);
break;
case parse_keyword_else:
symbol_stack_pop();
break;
case parse_keyword_switch:
symbol_stack_pop_push_production<statement>(3);
break;
case parse_keyword_end:
PARSER_DIE(); //todo
break;
// 'in' is only special within a for_header
case parse_keyword_in:
case parse_keyword_none:
case parse_keyword_command:
case parse_keyword_builtin:
case parse_keyword_case:
symbol_stack_pop_push_production<statement>(4);
break;
}
break;
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_background:
case parse_token_type_terminate:
parse_error(L"statement", token);
break;
default:
token_unhandled(token, __FUNCTION__);
break;
}
}
void parse_ll_t::accept_token_block_header(parse_token_t token)
{
PARSE_ASSERT(stack_top_type() == symbol_block_header);
switch (token.type)
{
case parse_token_type_string:
switch (token.keyword)
{
case parse_keyword_else:
PARSER_DIE(); //todo
break;
case parse_keyword_for:
symbol_stack_pop_push_production<block_header>(0);
break;
case parse_keyword_while:
symbol_stack_pop_push_production<block_header>(1);
break;
case parse_keyword_function:
symbol_stack_pop_push_production<block_header>(2);
break;
case parse_keyword_begin:
symbol_stack_pop_push_production<block_header>(3);
break;
default:
token_unhandled(token, __FUNCTION__);
break;
}
break;
default:
token_unhandled(token, __FUNCTION__);
break;
}
}
void parse_ll_t::accept_token_else_clause(parse_token_t token)
{
PARSE_ASSERT(stack_top_type() == symbol_else_clause);
@ -614,25 +563,6 @@ void parse_ll_t::accept_token_else_continuation(parse_token_t token)
symbol_stack_produce<else_continuation>(token);
}
void parse_ll_t::accept_token_boolean_statement(parse_token_t token)
{
PARSE_ASSERT(stack_top_type() == symbol_boolean_statement);
top_node_set_tag(token.keyword);
symbol_stack_produce<boolean_statement>(token);
}
void parse_ll_t::accept_token_decorated_statement(parse_token_t token)
{
PARSE_ASSERT(stack_top_type() == symbol_decorated_statement);
top_node_set_tag(token.keyword);
symbol_stack_produce<case_item_list>(token);
}
void parse_ll_t::accept_token_plain_statement(parse_token_t token)
{
PARSE_ASSERT(stack_top_type() == symbol_plain_statement);
symbol_stack_produce<case_item_list>(token);
}
void parse_ll_t::accept_token_argument_list(parse_token_t token)
{
@ -704,7 +634,7 @@ bool parse_ll_t::top_node_match_token(parse_token_t token)
void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
{
bool logit = true;
bool logit = false;
if (logit)
{
const wcstring txt = wcstring(src, token.source_start, token.source_length);
@ -727,21 +657,21 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
switch (stack_top_type())
{
/* Symbols */
/* Symbols */
case symbol_job_list:
accept_token_job_list(token);
symbol_stack_produce<job_list>(token);
break;
case symbol_job:
symbol_stack_pop_push2<parse_symbols::job>();
symbol_stack_produce<job>(token);
break;
case symbol_job_continuation:
accept_token_job_continuation(token);
symbol_stack_produce<job_continuation>(token);
break;
case symbol_statement:
accept_token_statement(token);
symbol_stack_produce<statement>(token);
break;
case symbol_if_statement:
@ -797,15 +727,17 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
break;
case symbol_boolean_statement:
accept_token_boolean_statement(token);
top_node_set_tag(token.keyword);
symbol_stack_produce<boolean_statement>(token);
break;
case symbol_decorated_statement:
accept_token_decorated_statement(token);
top_node_set_tag(token.keyword);
symbol_stack_produce<decorated_statement>(token);
break;
case symbol_plain_statement:
accept_token_plain_statement(token);
symbol_stack_produce<plain_statement>(token);
break;
case symbol_argument_list_nonempty:
@ -824,7 +756,11 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
accept_token_argument_or_redirection(token);
break;
/* Tokens */
case symbol_optional_background:
symbol_stack_produce<optional_background>(token);
break;
/* Tokens */
case parse_token_type_string:
consumed = accept_token_string(token);
break;
@ -847,10 +783,12 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt)
if (tok == TOK_STRING)
{
const struct {
const struct
{
const wchar_t *txt;
parse_keyword_t keyword;
} keywords[] = {
} keywords[] =
{
{L"if", parse_keyword_if},
{L"else", parse_keyword_else},
{L"for", parse_keyword_for},
@ -888,6 +826,7 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_
token_type tok_type = static_cast<token_type>(tok_last_type(&tok));
const wchar_t *tok_txt = tok_last(&tok);
int tok_start = tok_get_pos(&tok);
size_t tok_extent = tok_get_extent(&tok);
if (tok_type == TOK_ERROR)
{
@ -898,12 +837,15 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_
parse_token_t token = parse_token_from_tokenizer_token(tok_type);
token.tokenizer_type = tok_type;
token.source_start = (size_t)tok_start;
token.source_length = wcslen(tok_txt);
token.source_length = tok_extent;
token.keyword = keyword_for_token(tok_type, tok_txt);
this->parser->accept_token(token, str);
if (this->parser->fatal_errored)
break;
}
wcstring result = dump_tree(this->parser->nodes, str);
wcstring result = L"";//dump_tree(this->parser->nodes, str);
fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str());
fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t));

View file

@ -41,7 +41,7 @@ class parse_t
{
parse_ll_t * const parser;
public:
public:
parse_t();
bool parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors);
};
@ -80,11 +80,13 @@ enum parse_token_type_t
symbol_argument_list_nonempty,
symbol_argument_list,
symbol_optional_background,
// Terminal types
parse_token_type_string,
parse_token_type_pipe,
parse_token_type_redirection,
parse_token_background,
parse_token_type_background,
parse_token_type_end,
parse_token_type_terminate,
@ -117,7 +119,7 @@ wcstring keyword_description(parse_keyword_t type);
/** Base class for nodes of a parse tree */
class parse_node_t
{
public:
public:
/* Type of the node */
enum parse_token_type_t type;
@ -154,360 +156,6 @@ class parse_node_tree_t : public std::vector<parse_node_t>
{
};
namespace parse_symbols
{
#define SYMBOL(x) static inline parse_token_type_t get_token() { return x; }
#define PRODUCE(X) static int production(parse_token_type_t tok, parse_keyword_t key) { return X; }
#define NO_PRODUCTION (-1)
template<parse_token_type_t WHICH>
struct Token
{
SYMBOL(WHICH);
typedef Token<WHICH> t0;
typedef Token<token_type_invalid> t1;
typedef Token<token_type_invalid> t2;
typedef Token<token_type_invalid> t3;
typedef Token<token_type_invalid> t4;
typedef Token<token_type_invalid> t5;
};
/* Placeholder */
typedef Token<token_type_invalid> none;
struct EMPTY
{
typedef none t0;
typedef none t1;
typedef none t2;
typedef none t3;
typedef none t4;
typedef none t5;
};
template<typename T0, typename T1, typename T2 = none, typename T3 = none, typename T4 = none, typename T5 = none>
struct Seq
{
typedef T0 t0;
typedef T1 t1;
typedef T2 t2;
typedef T3 t3;
typedef T4 t4;
typedef T5 t5;
};
template<typename P0, typename P1, typename P2 = none, typename P3 = none, typename P4 = none, typename P5 = none>
struct OR
{
typedef P0 p0;
typedef P1 p1;
typedef P2 p2;
typedef P3 p3;
typedef P4 p4;
typedef P5 p5;
};
template<parse_keyword_t WHICH>
struct Keyword
{
static inline parse_keyword_t get_token() { return WHICH; }
};
struct job;
struct statement;
struct job_continuation;
struct boolean_statement;
struct block_statement;
struct if_statement;
struct if_clause;
struct else_clause;
struct else_continuation;
struct switch_statement;
struct decorated_statement;
struct switch_statement;
struct case_item_list;
struct case_item;
struct argument_list_nonempty;
struct argument_list;
struct block_statement;
struct block_header;
struct for_header;
struct while_header;
struct begin_header;
struct function_header;
struct boolean_statement;
struct decorated_statement;
struct plain_statement;
struct arguments_or_redirections_list;
struct argument_or_redirection;
struct redirection;
struct statement_terminator;
/* A job_list is a list of jobs, separated by semicolons or newlines */
struct job_list : OR<
EMPTY,
Seq<job, job_list>,
Seq<Token<parse_token_type_end>, job_list>
>
{
SYMBOL(symbol_job_list)
};
/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */
struct job : Seq<statement, job_continuation>
{
SYMBOL(symbol_job);
};
struct job_continuation : OR<
EMPTY,
Seq<Token<parse_token_type_pipe>, statement, job_continuation>
>
{
SYMBOL(symbol_job_continuation);
};
/* A statement is a normal command, or an if / while / and etc */
struct statement : OR<
boolean_statement,
block_statement,
if_statement,
switch_statement,
decorated_statement
>
{
SYMBOL(symbol_statement);
};
struct if_statement : Seq<if_clause, else_clause, Keyword<parse_keyword_end> >
{
SYMBOL(symbol_if_statement);
PRODUCE(0)
};
struct if_clause : Seq<Keyword<parse_keyword_if>, job, statement_terminator, job_list>
{
SYMBOL(symbol_if_clause);
PRODUCE(0)
};
struct else_clause : OR<
EMPTY,
Keyword<parse_keyword_else>, else_continuation
>
{
SYMBOL(symbol_else_clause);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (key)
{
case parse_keyword_else: return 1;
default: return 0;
}
}
};
struct else_continuation : OR<
Seq<if_clause, else_clause>,
Seq<statement_terminator, job_list>
>
{
SYMBOL(symbol_else_continuation);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (key)
{
case parse_keyword_if: return 0;
default: return 1;
}
}
};
struct switch_statement : Seq<Keyword<parse_keyword_switch>, Token<parse_token_type_string>, statement_terminator, case_item_list, Keyword<parse_keyword_end>
>
{
SYMBOL(symbol_switch_statement);
};
struct case_item_list : OR
<
EMPTY,
case_item, case_item_list
>
{
SYMBOL(symbol_case_item_list);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (key)
{
case parse_keyword_case: return 1;
default: return 0;
}
}
};
struct case_item : Seq<Keyword<parse_keyword_case>, argument_list, statement_terminator, job_list>
{
SYMBOL(symbol_case_item);
};
struct argument_list_nonempty : Seq<Token<parse_token_type_string>, argument_list>
{
SYMBOL(symbol_argument_list_nonempty);
};
struct argument_list : OR<EMPTY, argument_list_nonempty>
{
SYMBOL(symbol_argument_list);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (tok)
{
case parse_token_type_string: return 1;
default: return 0;
}
}
};
struct block_statement : Seq<block_header, statement_terminator, job_list, Keyword<parse_keyword_end>, arguments_or_redirections_list>
{
SYMBOL(symbol_block_statement);
PRODUCE(0)
};
struct block_header : OR<for_header, while_header, function_header, begin_header>
{
SYMBOL(symbol_block_header);
};
struct for_header : Seq<Keyword<parse_keyword_for>, Token<parse_token_type_string>, Keyword<parse_keyword_in>, arguments_or_redirections_list>
{
SYMBOL(symbol_for_header);
};
struct while_header : Seq<Keyword<parse_keyword_while>, statement>
{
SYMBOL(symbol_while_header);
};
struct begin_header : Keyword<parse_keyword_begin>
{
SYMBOL(symbol_begin_header);
};
struct function_header : Keyword<parse_keyword_function>
{
SYMBOL(symbol_function_header);
};
/* A boolean statement is AND or OR or NOT */
struct boolean_statement : OR<
Seq<Keyword<parse_keyword_and>, statement>,
Seq<Keyword<parse_keyword_or>, statement>,
Seq<Keyword<parse_keyword_not>, statement>
>
{
SYMBOL(symbol_boolean_statement);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (key)
{
case parse_keyword_and: return 0;
case parse_keyword_or: return 1;
case parse_keyword_not: return 2;
default: return NO_PRODUCTION;
}
}
};
/* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */
struct decorated_statement : OR<
Seq<Keyword<parse_keyword_command>, plain_statement>,
Seq<Keyword<parse_keyword_builtin>, plain_statement>,
plain_statement
>
{
SYMBOL(symbol_decorated_statement);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (key)
{
case parse_keyword_command: return 0;
case parse_keyword_builtin: return 1;
default: return 2;
}
}
};
struct plain_statement : Seq<Token<parse_token_type_string>, arguments_or_redirections_list>
{
SYMBOL(symbol_plain_statement);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
return 0;
}
};
struct arguments_or_redirections_list : OR<
EMPTY,
Seq<argument_or_redirection, arguments_or_redirections_list> >
{
SYMBOL(symbol_arguments_or_redirections_list);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (tok)
{
case parse_token_type_string:
case parse_token_type_redirection:
return 1;
default:
return 0;
}
}
};
struct argument_or_redirection : OR<
Token<parse_token_type_string>,
redirection
>
{
SYMBOL(symbol_argument_or_redirection);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (tok)
{
case parse_token_type_string: return 0;
case parse_token_type_redirection: return 1;
default: return NO_PRODUCTION;
}
}
};
struct redirection : Token<parse_token_type_redirection>
{
SYMBOL(parse_token_type_redirection);
};
struct statement_terminator : Token<parse_token_type_end>
{
SYMBOL(parse_token_type_end);
};
}
/* Fish grammar:
@ -529,7 +177,7 @@ namespace parse_symbols
# A block is a conditional, loop, or begin/end
if_statement = if_clause else_clause <END>
if_statement = if_clause else_clause <END> arguments_or_redirections_list
if_clause = <IF> job STATEMENT_TERMINATOR job_list
else_clause = <empty> |
<ELSE> else_continuation
@ -544,7 +192,7 @@ namespace parse_symbols
argument_list_nonempty = <TOK_STRING> argument_list
argument_list = <empty> | argument_list_nonempty
block_statement = block_header STATEMENT_TERMINATOR job_list <END> arguments_or_redirections_list
block_statement = block_header <TOK_END> job_list <END> arguments_or_redirections_list
block_header = for_header | while_header | function_header | begin_header
for_header = FOR var_name IN arguments_or_redirections_list
while_header = WHILE statement
@ -558,7 +206,7 @@ namespace parse_symbols
# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command"
decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement
plain_statement = COMMAND arguments_or_redirections_list
plain_statement = COMMAND arguments_or_redirections_list optional_background
arguments_or_redirections_list = <empty> |
argument_or_redirection arguments_or_redirections_list
@ -567,6 +215,8 @@ namespace parse_symbols
terminator = <TOK_END> | <TOK_BACKGROUND>
optional_background = <empty> | <TOK_BACKGROUND>
*/
#endif

586
parse_tree_construction.h Normal file
View file

@ -0,0 +1,586 @@
/**\file parse_tree.h
Programmatic representation of fish code.
*/
#ifndef FISH_PARSE_TREE_CONSTRUCTION_H
#define FISH_PARSE_TREE_CONSTRUCTION_H
#include "parse_tree.h"
/* Terrifying template black magic. */
namespace parse_symbols
{
#define SYMBOL(x) static inline parse_token_type_t get_token() { return x; }
#define PRODUCE(X) static int production(parse_token_type_t tok, parse_keyword_t key) { return X; }
#define NO_PRODUCTION (-1)
struct Symbol
{
typedef int magic_symbol_type_t;
};
template<parse_token_type_t WHICH>
struct Token : public Symbol
{
SYMBOL(WHICH);
};
/* Placeholder */
typedef Token<token_type_invalid> none;
typedef Token<token_type_invalid> EMPTY;
template<typename T0, typename T1, typename T2 = none, typename T3 = none, typename T4 = none, typename T5 = none>
struct Seq
{
typedef T0 t0;
typedef T1 t1;
typedef T2 t2;
typedef T3 t3;
typedef T4 t4;
typedef T5 t5;
typedef int magic_seq_type_t;
};
template<typename P0, typename P1, typename P2 = none, typename P3 = none, typename P4 = none, typename P5 = none>
struct OR
{
typedef P0 p0;
typedef P1 p1;
typedef P2 p2;
typedef P3 p3;
typedef P4 p4;
typedef P5 p5;
typedef int magic_or_type_t;
};
template<parse_keyword_t WHICH>
struct Keyword : public Symbol
{
static inline parse_keyword_t get_token()
{
return WHICH;
}
};
struct job;
struct statement;
struct job_continuation;
struct boolean_statement;
struct block_statement;
struct if_statement;
struct if_clause;
struct else_clause;
struct else_continuation;
struct switch_statement;
struct decorated_statement;
struct switch_statement;
struct case_item_list;
struct case_item;
struct argument_list_nonempty;
struct argument_list;
struct block_statement;
struct block_header;
struct for_header;
struct while_header;
struct begin_header;
struct function_header;
struct boolean_statement;
struct decorated_statement;
struct plain_statement;
struct arguments_or_redirections_list;
struct argument_or_redirection;
struct redirection;
struct statement_terminator;
struct optional_background;
/* A job_list is a list of jobs, separated by semicolons or newlines */
struct job_list : public Symbol
{
typedef OR<
EMPTY,
Seq<job, job_list>,
Seq<Token<parse_token_type_end>, job_list>
> productions;
SYMBOL(symbol_job_list)
static int production(parse_token_type_t token_type, parse_keyword_t token_keyword)
{
switch (token_type)
{
case parse_token_type_string:
// 'end' is special
switch (token_keyword)
{
case parse_keyword_end:
case parse_keyword_else:
// End this job list
return 0;
default:
// Normal string
return 1;
}
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_type_background:
return 1;
case parse_token_type_end:
// Empty line
return 2;
case parse_token_type_terminate:
// no more commands, just transition to empty
return 0;
break;
default:
return NO_PRODUCTION;
}
}
};
/* A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation */
struct job : public Symbol
{
typedef Seq<statement, job_continuation> sole_production;
SYMBOL(symbol_job);
};
struct job_continuation : public Symbol
{
typedef OR<
EMPTY,
Seq<Token<parse_token_type_pipe>, statement, job_continuation>
> productions;
SYMBOL(symbol_job_continuation);
static int production(parse_token_type_t token_type, parse_keyword_t token_keyword)
{
switch (token_type)
{
case parse_token_type_pipe:
// Pipe, continuation
return 1;
default:
// Not a pipe, no job continuation
return 0;
}
}
};
/* A statement is a normal command, or an if / while / and etc */
struct statement : public Symbol
{
typedef OR<
boolean_statement,
block_statement,
if_statement,
switch_statement,
decorated_statement
> productions;
SYMBOL(symbol_statement);
static int production(parse_token_type_t token_type, parse_keyword_t token_keyword)
{
switch (token_type)
{
case parse_token_type_string:
switch (token_keyword)
{
case parse_keyword_and:
case parse_keyword_or:
case parse_keyword_not:
return 0;
case parse_keyword_for:
case parse_keyword_while:
case parse_keyword_function:
case parse_keyword_begin:
return 1;
case parse_keyword_if:
return 2;
case parse_keyword_else:
//symbol_stack_pop();
return NO_PRODUCTION;
case parse_keyword_switch:
return 3;
case parse_keyword_end:
PARSER_DIE(); //todo
return NO_PRODUCTION;
// 'in' is only special within a for_header
case parse_keyword_in:
case parse_keyword_none:
case parse_keyword_command:
case parse_keyword_builtin:
case parse_keyword_case:
return 4;
}
break;
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_type_background:
case parse_token_type_terminate:
return NO_PRODUCTION;
//parse_error(L"statement", token);
default:
return NO_PRODUCTION;
}
}
};
struct if_statement : public Symbol
{
typedef Seq<if_clause, else_clause, Keyword<parse_keyword_end>, arguments_or_redirections_list> sole_production;
SYMBOL(symbol_if_statement);
};
struct if_clause : public Symbol
{
typedef Seq<Keyword<parse_keyword_if>, job, statement_terminator, job_list> sole_production;
SYMBOL(symbol_if_clause);
};
struct else_clause : public Symbol
{
typedef OR<
EMPTY,
Seq<Keyword<parse_keyword_else>, else_continuation>
> productions;
SYMBOL(symbol_else_clause);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (key)
{
case parse_keyword_else:
return 1;
default:
return 0;
}
}
};
struct else_continuation : public Symbol
{
typedef OR<
Seq<if_clause, else_clause>,
Seq<statement_terminator, job_list>
> productions;
SYMBOL(symbol_else_continuation);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (key)
{
case parse_keyword_if:
return 0;
default:
return 1;
}
}
};
struct switch_statement : public Symbol
{
typedef Seq<Keyword<parse_keyword_switch>,
Token<parse_token_type_string>,
statement_terminator,
case_item_list,
Keyword<parse_keyword_end>
> sole_production;
SYMBOL(symbol_switch_statement);
};
struct case_item_list : public Symbol
{
typedef OR
<
EMPTY,
Seq<case_item, case_item_list>,
Seq<Token<parse_token_type_end>, case_item_list>
> productions;
SYMBOL(symbol_case_item_list);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (key)
{
case parse_keyword_case: return 1;
default:
if (tok == parse_token_type_end)
{
/* empty line */
return 2;
}
else
{
return 0;
}
}
}
};
struct case_item : public Symbol
{
typedef Seq<Keyword<parse_keyword_case>, argument_list, statement_terminator, job_list> sole_production;
SYMBOL(symbol_case_item);
};
struct argument_list_nonempty : public Symbol
{
typedef Seq<Token<parse_token_type_string>, argument_list> sole_production;
SYMBOL(symbol_argument_list_nonempty);
};
struct argument_list : public Symbol
{
typedef OR<EMPTY, argument_list_nonempty> productions;
SYMBOL(symbol_argument_list);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (tok)
{
case parse_token_type_string:
return 1;
default:
return 0;
}
}
};
struct block_statement : public Symbol
{
typedef Seq<block_header, statement_terminator, job_list, Keyword<parse_keyword_end>, arguments_or_redirections_list> sole_production;
SYMBOL(symbol_block_statement);
};
struct block_header : public Symbol
{
typedef OR<for_header, while_header, function_header, begin_header> productions;
SYMBOL(symbol_block_header);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (key)
{
// todo
case parse_keyword_else:
return NO_PRODUCTION;
case parse_keyword_for:
return 0;
case parse_keyword_while:
return 1;
case parse_keyword_function:
return 2;
case parse_keyword_begin:
return 3;
default:
return NO_PRODUCTION;
}
}
};
struct for_header : public Symbol
{
typedef Seq<Keyword<parse_keyword_for>, Token<parse_token_type_string>, Keyword<parse_keyword_in>, arguments_or_redirections_list> sole_production;
SYMBOL(symbol_for_header);
};
struct while_header : public Symbol
{
typedef Seq<Keyword<parse_keyword_while>, statement> sole_production;
SYMBOL(symbol_while_header);
};
struct begin_header : public Symbol
{
typedef Keyword<parse_keyword_begin> sole_production;
SYMBOL(symbol_begin_header);
};
struct function_header : public Symbol
{
typedef Seq< Keyword<parse_keyword_function>, Token<parse_token_type_string>, argument_list> sole_production;
SYMBOL(symbol_function_header);
};
/* A boolean statement is AND or OR or NOT */
struct boolean_statement : public Symbol
{
typedef OR<
Seq<Keyword<parse_keyword_and>, statement>,
Seq<Keyword<parse_keyword_or>, statement>,
Seq<Keyword<parse_keyword_not>, statement>
> productions;
SYMBOL(symbol_boolean_statement);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (key)
{
case parse_keyword_and:
return 0;
case parse_keyword_or:
return 1;
case parse_keyword_not:
return 2;
default:
return NO_PRODUCTION;
}
}
};
/* A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" */
struct decorated_statement : public Symbol
{
typedef OR<
Seq<Keyword<parse_keyword_command>, plain_statement>,
Seq<Keyword<parse_keyword_builtin>, plain_statement>,
plain_statement
> productions;
SYMBOL(symbol_decorated_statement);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (key)
{
case parse_keyword_command:
return 0;
case parse_keyword_builtin:
return 1;
default:
return 2;
}
}
};
struct plain_statement : public Symbol
{
typedef Seq<Token<parse_token_type_string>, arguments_or_redirections_list, optional_background> sole_production;
SYMBOL(symbol_plain_statement);
};
struct arguments_or_redirections_list : public Symbol
{
typedef OR<
EMPTY,
Seq<argument_or_redirection, arguments_or_redirections_list> >
productions;
SYMBOL(symbol_arguments_or_redirections_list);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (tok)
{
case parse_token_type_string:
case parse_token_type_redirection:
return 1;
default:
return 0;
}
}
};
struct argument_or_redirection : public Symbol
{
typedef OR<
Token<parse_token_type_string>,
redirection
> productions;
SYMBOL(symbol_argument_or_redirection);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (tok)
{
case parse_token_type_string:
return 0;
case parse_token_type_redirection:
return 1;
default:
return NO_PRODUCTION;
}
}
};
struct redirection : public Symbol
{
typedef Token<parse_token_type_redirection> production;
SYMBOL(parse_token_type_redirection);
};
struct statement_terminator : public Symbol
{
typedef Token<parse_token_type_end> production;
SYMBOL(parse_token_type_end);
};
struct optional_background : public Symbol
{
typedef OR<
EMPTY,
Token<parse_token_type_background>
> productions;
SYMBOL(symbol_optional_background);
static int production(parse_token_type_t tok, parse_keyword_t key)
{
switch (tok)
{
case parse_token_type_background:
return 1;
default:
return 0;
}
}
};
}
#endif

View file

@ -651,13 +651,19 @@ wcstring tok_first(const wchar_t *str)
return result;
}
int tok_get_pos(tokenizer_t *tok)
int tok_get_pos(const tokenizer_t *tok)
{
CHECK(tok, 0);
return (int)tok->last_pos;
}
size_t tok_get_extent(const tokenizer_t *tok)
{
CHECK(tok, 0);
size_t current_pos = tok->buff - tok->orig_buff;
return current_pos > tok->last_pos ? current_pos - tok->last_pos : 0;
}
void tok_set_pos(tokenizer_t *tok, int pos)
{

View file

@ -142,7 +142,10 @@ int tok_has_next(tokenizer_t *tok);
/**
Returns the position of the beginning of the current token in the original string
*/
int tok_get_pos(tokenizer_t *tok);
int tok_get_pos(const tokenizer_t *tok);
/** Returns the extent of the current token */
size_t tok_get_extent(const tokenizer_t *tok);
/**
Returns the original string to tokenizer