More work on new parser

This commit is contained in:
ridiculousfish 2013-06-23 02:09:46 -07:00
parent b771e97ac6
commit d840643cb3
3 changed files with 403 additions and 141 deletions

View file

@ -24,10 +24,18 @@ struct exec_argument_t
node_offset_t parse_node_idx; node_offset_t parse_node_idx;
sanity_id_t command_sanity_id; sanity_id_t command_sanity_id;
}; };
typedef std::vector<exec_argument_t> exec_argument_list_t;
struct exec_redirection_t struct exec_redirection_t
{ {
node_offset_t parse_node_idx;
};
typedef std::vector<exec_redirection_t> exec_redirection_list_t;
struct exec_arguments_and_redirections_t
{
exec_argument_list_t arguments;
exec_redirection_list_t redirections;
}; };
struct exec_basic_statement_t struct exec_basic_statement_t
@ -35,6 +43,9 @@ struct exec_basic_statement_t
// Node containing the command // Node containing the command
node_offset_t command_idx; node_offset_t command_idx;
// Arguments
exec_arguments_and_redirections_t arguments_and_redirections;
// Decoration // Decoration
enum enum
{ {
@ -43,8 +54,6 @@ struct exec_basic_statement_t
decoration_builtin decoration_builtin
} decoration; } decoration;
std::vector<exec_argument_t> arguments;
std::vector<exec_redirection_t> redirections;
uint16_t sanity_id; uint16_t sanity_id;
exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) exec_basic_statement_t() : command_idx(0), decoration(decoration_plain)
@ -70,10 +79,48 @@ struct exec_basic_statement_t
PARSER_DIE(); PARSER_DIE();
break; break;
} }
}
const exec_argument_list_t &arguments() const
{
return arguments_and_redirections.arguments;
}
const exec_redirection_list_t &redirections() const
{
return arguments_and_redirections.redirections;
} }
}; };
struct exec_block_statement_t
{
// Arguments
exec_arguments_and_redirections_t arguments_and_redirections;
const exec_argument_list_t &arguments() const
{
return arguments_and_redirections.arguments;
}
const exec_redirection_list_t &redirections() const
{
return arguments_and_redirections.redirections;
}
};
struct exec_job_t
{
// List of statements (separated with pipes)
std::vector<exec_basic_statement_t> statements;
void add_statement(const exec_basic_statement_t &statement)
{
statements.push_back(statement);
}
};
class parse_exec_t class parse_exec_t
{ {
parse_node_tree_t parse_tree; parse_node_tree_t parse_tree;
@ -85,6 +132,9 @@ class parse_exec_t
/* The stack of nodes as we execute them */ /* The stack of nodes as we execute them */
std::vector<exec_node_t> exec_nodes; std::vector<exec_node_t> exec_nodes;
/* The stack of jobs being built */
std::vector<exec_job_t> assembling_jobs;
/* The stack of commands being built */ /* The stack of commands being built */
std::vector<exec_basic_statement_t> assembling_statements; std::vector<exec_basic_statement_t> assembling_statements;
@ -96,6 +146,38 @@ class parse_exec_t
output->assign(src, node.source_start, node.source_length); output->assign(src, node.source_start, node.source_length);
} }
const parse_node_t &get_child(parse_node_t &parent, node_offset_t which) const
{
return parse_tree.at(parent.child_offset(which));
}
void pop_push_specific(node_offset_t idx1, node_offset_t idx2 = NODE_OFFSET_INVALID, node_offset_t idx3 = NODE_OFFSET_INVALID, node_offset_t idx4 = NODE_OFFSET_INVALID, node_offset_t idx5 = NODE_OFFSET_INVALID)
{
PARSE_ASSERT(! exec_nodes.empty());
// Figure out the offset of the children
exec_node_t &top = exec_nodes.back();
const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx);
node_offset_t child_node_idx = parse_node.child_start;
// Remove the top node
exec_nodes.pop_back();
// Append the given children, backwards
sanity_id_t command_sanity_id = assembling_statements.empty() ? 0 : assembling_statements.back().sanity_id;
const node_offset_t idxs[] = {idx5, idx4, idx3, idx2, idx1};
for (size_t q=0; q < sizeof idxs / sizeof *idxs; q++)
{
node_offset_t idx = idxs[q];
if (idx != (node_offset_t)(-1))
{
PARSE_ASSERT(idx < parse_node.child_count);
exec_nodes.push_back(child_node_idx + idx);
exec_nodes.back().command_sanity_id = command_sanity_id;
}
}
}
void pop_push(node_offset_t child_idx, node_offset_t child_count = 1) void pop_push(node_offset_t child_idx, node_offset_t child_count = 1)
{ {
PARSE_ASSERT(! exec_nodes.empty()); PARSE_ASSERT(! exec_nodes.empty());
@ -139,58 +221,97 @@ class parse_exec_t
pop_push(0, parse_node.child_count); pop_push(0, parse_node.child_count);
} }
void assemble_command(node_offset_t idx) void assemble_1_argument_or_redirection(node_offset_t idx, exec_arguments_and_redirections_t *output) const
{
// Set the command for our top basic statement
PARSE_ASSERT(! assembling_statements.empty());
assembling_statements.back().command_idx = idx;
}
void assemble_argument_or_redirection(node_offset_t idx)
{ {
const parse_node_t &node = parse_tree.at(idx); const parse_node_t &node = parse_tree.at(idx);
PARSE_ASSERT(! assembling_statements.empty()); PARSE_ASSERT(output != NULL);
exec_basic_statement_t &statement = assembling_statements.back(); PARSE_ASSERT(node.type == symbol_argument_or_redirection);
switch (node.type) PARSE_ASSERT(node.child_count == 1);
node_offset_t child_idx = node.child_offset(0);
const parse_node_t &child = parse_tree.at(child_idx);
switch (child.type)
{ {
case parse_token_type_string: case parse_token_type_string:
// Argument // Argument
{ {
exec_argument_t arg = exec_argument_t(); exec_argument_t arg = exec_argument_t();
arg.parse_node_idx = idx; arg.parse_node_idx = idx;
arg.command_sanity_id = statement.sanity_id; output->arguments.push_back(arg);
statement.arguments.push_back(arg);
} }
break; break;
case parse_token_type_redirection: case parse_token_type_redirection:
// Redirection // Redirection
{
exec_redirection_t redirect = exec_redirection_t();
redirect.parse_node_idx = idx;
output->redirections.push_back(redirect);
}
break; break;
default: default:
PARSER_DIE(); PARSER_DIE();
break; break;
} }
} }
void assembly_complete() void assemble_arguments_and_redirections(node_offset_t start_idx, exec_arguments_and_redirections_t *output) const
{ {
// Finished building a command node_offset_t idx = start_idx;
PARSE_ASSERT(! assembling_statements.empty()); for (;;)
const exec_basic_statement_t &statement = assembling_statements.back(); {
const parse_node_t &node = parse_tree.at(idx);
PARSE_ASSERT(node.type == symbol_arguments_or_redirections_list);
PARSE_ASSERT(node.child_count == 0 || node.child_count == 2);
if (node.child_count == 0)
{
// No more children
break;
}
else
{
// Skip to next child
assemble_1_argument_or_redirection(node.child_offset(0), output);
idx = node.child_offset(1);
}
}
}
void assemble_command_for_plain_statement(node_offset_t idx, parse_keyword_t decoration)
{
const parse_node_t &node = parse_tree.at(idx);
PARSE_ASSERT(node.type == symbol_plain_statement);
PARSE_ASSERT(node.child_count == 2);
exec_basic_statement_t statement;
statement.set_decoration(decoration);
statement.command_idx = node.child_offset(0);
assemble_arguments_and_redirections(node.child_offset(1), &statement.arguments_and_redirections);
assembling_jobs.back().add_statement(statement);
}
void job_assembly_complete()
{
PARSE_ASSERT(! assembling_jobs.empty());
const exec_job_t &job = assembling_jobs.back();
if (simulating) if (simulating)
{ {
simulate_statement(statement); simulate_job(job);
} }
assembling_statements.pop_back(); assembling_jobs.pop_back();
} }
void simulate_statement(const exec_basic_statement_t &statement) void simulate_job(const exec_job_t &job)
{ {
PARSE_ASSERT(simulating); PARSE_ASSERT(simulating);
wcstring line; wcstring line;
for (size_t i=0; i < job.statements.size(); i++)
{
if (i > 0)
{
line.append(L" <pipe> ");
}
const exec_basic_statement_t &statement = job.statements.at(i);
switch (statement.decoration) switch (statement.decoration)
{ {
case exec_basic_statement_t::decoration_builtin: case exec_basic_statement_t::decoration_builtin:
@ -209,20 +330,21 @@ class parse_exec_t
get_node_string(statement.command_idx, &tmp); get_node_string(statement.command_idx, &tmp);
line.append(L"cmd:"); line.append(L"cmd:");
line.append(tmp); line.append(tmp);
for (size_t i=0; i < statement.arguments.size(); i++) for (size_t i=0; i < statement.arguments().size(); i++)
{ {
const exec_argument_t &arg = statement.arguments.at(i); const exec_argument_t &arg = statement.arguments().at(i);
get_node_string(arg.parse_node_idx, &tmp); get_node_string(arg.parse_node_idx, &tmp);
line.append(L" "); line.append(L" ");
line.append(L"arg:"); line.append(L"arg:");
line.append(tmp); line.append(tmp);
} }
}
simulation_result.push_back(line); simulation_result.push_back(line);
} }
void enter_parse_node(size_t idx); void enter_parse_node(size_t idx);
void run_top_node(void); void run_top_node(void);
exec_basic_statement_t *create_basic_statement(void); exec_job_t *create_job(void);
public: public:
parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), simulating(false) parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), simulating(false)
@ -231,10 +353,10 @@ class parse_exec_t
wcstring simulate(void); wcstring simulate(void);
}; };
exec_basic_statement_t *parse_exec_t::create_basic_statement() exec_job_t *parse_exec_t::create_job()
{ {
assembling_statements.push_back(exec_basic_statement_t()); assembling_jobs.push_back(exec_job_t());
return &assembling_statements.back(); return &assembling_jobs.back();
} }
void parse_exec_t::run_top_node() void parse_exec_t::run_top_node()
@ -242,7 +364,7 @@ void parse_exec_t::run_top_node()
PARSE_ASSERT(! exec_nodes.empty()); PARSE_ASSERT(! exec_nodes.empty());
exec_node_t &top = exec_nodes.back(); exec_node_t &top = exec_nodes.back();
const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx);
bool log = false; bool log = true;
if (log) if (log)
{ {
@ -254,68 +376,102 @@ void parse_exec_t::run_top_node()
switch (parse_node.type) switch (parse_node.type)
{ {
case symbol_statement_list: case symbol_job_list:
PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2);
if (parse_node.child_count == 0)
{
// No more jobs, done
pop();
}
else if (parse_tree.at(parse_node.child_start + 0).type == parse_token_type_end)
{
// Empty job, so just skip it
pop_push(1, 1);
}
else
{
// Normal job
pop_push(0, 2);
}
break;
case symbol_job:
{
PARSE_ASSERT(parse_node.child_count == 2);
exec_job_t *job = create_job();
pop_push_all(); pop_push_all();
break; break;
}
case symbol_job_continuation:
PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 3);
if (parse_node.child_count == 0)
{
// All done with this job
job_assembly_complete();
pop();
}
else
{
// Skip the pipe
pop_push(1, 2);
}
break;
case symbol_statement: case symbol_statement:
{ {
PARSE_ASSERT(parse_node.child_count == 1); PARSE_ASSERT(parse_node.child_count == 1);
// See if we're just an empty statement
const parse_node_t &child = parse_tree.at(parse_node.child_start + 0);
if (child.type == parse_token_type_end)
{
// Empty statement
pop();
}
else
{
// We have a statement to execute
pop_push_all(); pop_push_all();
break;
} }
case symbol_block_statement:
{
PARSE_ASSERT(parse_node.child_count == 5);
pop_push_specific(0, 2, 4);
break;
}
case symbol_block_header:
{
PARSE_ASSERT(parse_node.child_count == 1);
pop_push_all();
break;
}
case symbol_function_header:
{
PARSE_ASSERT(parse_node.child_count == 3);
//pop_push_all();
pop();
break; break;
} }
case symbol_decorated_statement: case symbol_decorated_statement:
{ {
PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2); PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2);
exec_basic_statement_t *cmd = create_basic_statement();
cmd->set_decoration(parse_node.tag);
// Push the last node (skip any decoration) node_offset_t plain_statement_idx = parse_node.child_offset(parse_node.child_count - 1);
pop_push(parse_node.child_count - 1, 1); parse_keyword_t decoration = static_cast<parse_keyword_t>(parse_node.tag);
assemble_command_for_plain_statement(plain_statement_idx, decoration);
pop();
break; break;
} }
// The following symbols should be handled by their parents, i.e. never pushed on our stack
case symbol_plain_statement: case symbol_plain_statement:
PARSE_ASSERT(parse_node.child_count == 3);
// Extract the command
PARSE_ASSERT(! assembling_statements.empty());
assemble_command(parse_node.child_start + 0);
// Jump to statement list, then terminator
pop_push(1, 2);
break;
case symbol_arguments_or_redirections_list: case symbol_arguments_or_redirections_list:
PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2);
pop_push_all();
break;
case symbol_argument_or_redirection: case symbol_argument_or_redirection:
PARSE_ASSERT(parse_node.child_count == 1); PARSER_DIE();
assemble_argument_or_redirection(parse_node.child_start + 0);
pop();
break; break;
case parse_token_type_end: case parse_token_type_end:
PARSE_ASSERT(parse_node.child_count == 0); PARSE_ASSERT(parse_node.child_count == 0);
assembly_complete();
pop(); pop();
break; break;
default: default:
fprintf(stderr, "Unhandled token type %ld\n", (long)parse_node.type); fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), top.parse_node_idx);
PARSER_DIE(); PARSER_DIE();
break; break;

View file

@ -38,13 +38,16 @@ wcstring parse_error_t::describe(const wcstring &src) const
return result; return result;
} }
static wcstring token_type_description(parse_token_type_t type) wcstring token_type_description(parse_token_type_t type)
{ {
switch (type) switch (type)
{ {
case token_type_invalid: return L"invalid"; case token_type_invalid: return L"invalid";
case symbol_statement_list: return L"statement_list"; case symbol_job_list: return L"job_list";
case symbol_job: return L"job";
case symbol_job_continuation: return L"job_continuation";
case symbol_statement: return L"statement"; case symbol_statement: return L"statement";
case symbol_block_statement: return L"block_statement"; case symbol_block_statement: return L"block_statement";
case symbol_block_header: return L"block_header"; case symbol_block_header: return L"block_header";
@ -70,6 +73,30 @@ static wcstring token_type_description(parse_token_type_t type)
} }
} }
wcstring keyword_description(parse_keyword_t k)
{
switch (k)
{
case parse_keyword_none: return L"none";
case parse_keyword_if: return L"if";
case parse_keyword_else: return L"else";
case parse_keyword_for: return L"for";
case parse_keyword_in: return L"in";
case parse_keyword_while: return L"while";
case parse_keyword_begin: return L"begin";
case parse_keyword_function: return L"function";
case parse_keyword_switch: return L"switch";
case parse_keyword_end: return L"end";
case parse_keyword_and: return L"and";
case parse_keyword_or: return L"or";
case parse_keyword_not: return L"not";
case parse_keyword_command: return L"command";
case parse_keyword_builtin: return L"builtin";
default:
return format_string(L"Unknown keyword type %ld", static_cast<long>(k));
}
}
wcstring parse_node_t::describe(void) const wcstring parse_node_t::describe(void) const
{ {
wcstring result = token_type_description(type); wcstring result = token_type_description(type);
@ -83,8 +110,20 @@ struct parse_token_t
enum parse_keyword_t keyword; // Any keyword represented by this parser enum parse_keyword_t keyword; // Any keyword represented by this parser
size_t source_start; size_t source_start;
size_t source_length; size_t source_length;
wcstring describe() const;
}; };
wcstring parse_token_t::describe(void) const
{
wcstring result = token_type_description(type);
if (keyword != parse_keyword_none)
{
append_format(result, L" <%ls>", keyword_description(keyword).c_str());
}
return result;
}
// Convert from tokenizer_t's token type to our token // Convert from tokenizer_t's token type to our token
static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type) static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type)
{ {
@ -124,12 +163,12 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &
const size_t spacesPerIndent = 2; const size_t spacesPerIndent = 2;
// unindent statement lists by 1 to flatten them // unindent statement lists by 1 to flatten them
if (node.type == symbol_statement_list || node.type == symbol_arguments_or_redirections_list) if (node.type == symbol_job_list || node.type == symbol_arguments_or_redirections_list)
{ {
if (indent > 0) indent -= 1; if (indent > 0) indent -= 1;
} }
append_format(*result, L"%2lu ", *line); append_format(*result, L"%2lu - %l2u ", *line, start);
result->append(indent * spacesPerIndent, L' ');; result->append(indent * spacesPerIndent, L' ');;
result->append(node.describe()); result->append(node.describe());
if (node.child_count > 0) if (node.child_count > 0)
@ -190,17 +229,19 @@ class parse_ll_t
parse_ll_t() : fatal_errored(false) parse_ll_t() : fatal_errored(false)
{ {
// initial node // initial node
parse_stack_element_t elem = symbol_statement_list; parse_stack_element_t elem = symbol_job_list;
elem.node_idx = 0; elem.node_idx = 0;
symbol_stack.push_back(elem); // goal token symbol_stack.push_back(elem); // goal token
nodes.push_back(parse_node_t(symbol_statement_list)); nodes.push_back(parse_node_t(symbol_job_list));
} }
bool top_node_match_token(parse_token_t token); bool top_node_match_token(parse_token_t token);
// implementation of certain parser constructions // implementation of certain parser constructions
void accept_token(parse_token_t token); void accept_token(parse_token_t token, const wcstring &src);
void accept_token_statement_list(parse_token_t token); void accept_token_job_list(parse_token_t token);
void accept_token_job(parse_token_t token);
void accept_token_job_continuation(parse_token_t token);
void accept_token_statement(parse_token_t token); void accept_token_statement(parse_token_t token);
void accept_token_block_header(parse_token_t token); void accept_token_block_header(parse_token_t token);
void accept_token_boolean_statement(parse_token_t token); void accept_token_boolean_statement(parse_token_t token);
@ -289,7 +330,7 @@ class parse_ll_t
void parse_ll_t::token_unhandled(parse_token_t token, const char *function) void parse_ll_t::token_unhandled(parse_token_t token, const char *function)
{ {
fprintf(stderr, "Unhandled token with type %d in function %s\n", (int)token.type, function); fprintf(stderr, "Unhandled token with type %ls in function %s\n", token_type_description(token.type).c_str(), function);
PARSER_DIE(); PARSER_DIE();
} }
@ -304,17 +345,33 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token)
fatal_errored = true; fatal_errored = true;
} }
void parse_ll_t::accept_token_statement_list(parse_token_t token) void parse_ll_t::accept_token_job_list(parse_token_t token)
{ {
PARSE_ASSERT(stack_top_type() == symbol_statement_list); PARSE_ASSERT(stack_top_type() == symbol_job_list);
switch (token.type) switch (token.type)
{ {
case parse_token_type_string: case parse_token_type_string:
// 'end' is special
if (token.keyword == parse_keyword_end)
{
// End this job list
symbol_stack_pop_push();
}
else
{
// Normal string
symbol_stack_pop_push(symbol_job, symbol_job_list);
}
break;
case parse_token_type_pipe: case parse_token_type_pipe:
case parse_token_type_redirection: case parse_token_type_redirection:
case parse_token_background: case parse_token_background:
symbol_stack_pop_push(symbol_job, symbol_job_list);
break;
case parse_token_type_end: case parse_token_type_end:
symbol_stack_pop_push(symbol_statement, symbol_statement_list); symbol_stack_pop_push(parse_token_type_end, symbol_job_list);
break; break;
case parse_token_type_terminate: case parse_token_type_terminate:
@ -328,6 +385,30 @@ void parse_ll_t::accept_token_statement_list(parse_token_t token)
} }
} }
void parse_ll_t::accept_token_job(parse_token_t token)
{
PARSE_ASSERT(stack_top_type() == symbol_job);
symbol_stack_pop_push(symbol_statement, symbol_job_continuation);
}
void parse_ll_t::accept_token_job_continuation(parse_token_t token)
{
PARSE_ASSERT(stack_top_type() == symbol_job_continuation);
switch (token.type)
{
case parse_token_type_pipe:
// Pipe, continuation
symbol_stack_pop_push(parse_token_type_pipe, symbol_statement, symbol_job_continuation);
break;
default:
// Not a pipe, no job continuation
symbol_stack_pop_push();
break;
}
}
void parse_ll_t::accept_token_statement(parse_token_t token) void parse_ll_t::accept_token_statement(parse_token_t token)
{ {
PARSE_ASSERT(stack_top_type() == symbol_statement); PARSE_ASSERT(stack_top_type() == symbol_statement);
@ -343,23 +424,11 @@ void parse_ll_t::accept_token_statement(parse_token_t token)
break; break;
case parse_keyword_if: case parse_keyword_if:
symbol_stack_pop_push(symbol_if_header);
break;
case parse_keyword_for: case parse_keyword_for:
symbol_stack_pop_push(symbol_for_header);
break;
case parse_keyword_while: case parse_keyword_while:
symbol_stack_pop_push(symbol_while_header);
break;
case parse_keyword_begin:
symbol_stack_pop_push(symbol_begin_header);
break;
case parse_keyword_function: case parse_keyword_function:
symbol_stack_pop_push(symbol_function_header); case parse_keyword_begin:
symbol_stack_pop_push(symbol_block_statement);
break; break;
case parse_keyword_else: case parse_keyword_else:
@ -370,7 +439,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token)
break; break;
case parse_keyword_end: case parse_keyword_end:
// TODO PARSER_DIE(); //todo
break; break;
// 'in' is only special within a for_header // 'in' is only special within a for_header
@ -384,11 +453,6 @@ void parse_ll_t::accept_token_statement(parse_token_t token)
} }
break; break;
case parse_token_type_end:
// Empty line, or just a semicolon
symbol_stack_pop_push(parse_token_type_end);
break;
case parse_token_type_pipe: case parse_token_type_pipe:
case parse_token_type_redirection: case parse_token_type_redirection:
case parse_token_background: case parse_token_background:
@ -415,7 +479,7 @@ void parse_ll_t::accept_token_block_header(parse_token_t token)
break; break;
case parse_keyword_else: case parse_keyword_else:
//todo PARSER_DIE(); //todo
break; break;
case parse_keyword_for: case parse_keyword_for:
@ -508,7 +572,7 @@ void parse_ll_t::accept_token_decorated_statement(parse_token_t token)
void parse_ll_t::accept_token_plain_statement(parse_token_t token) void parse_ll_t::accept_token_plain_statement(parse_token_t token)
{ {
PARSE_ASSERT(stack_top_type() == symbol_plain_statement); PARSE_ASSERT(stack_top_type() == symbol_plain_statement);
symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list, parse_token_type_end); symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list);
} }
void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token) void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token)
@ -588,15 +652,23 @@ bool parse_ll_t::top_node_match_token(parse_token_t token)
symbol_stack.pop_back(); symbol_stack.pop_back();
result = true; result = true;
} }
else if (token.type == parse_token_type_pipe)
{
// Pipes are primitive
symbol_stack.pop_back();
result = true;
}
} }
return result; return result;
} }
void parse_ll_t::accept_token(parse_token_t token) void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
{ {
if (1) bool logit = true;
if (logit)
{ {
fprintf(stderr, "Accept token of type %ls\n", token_type_description(token.type).c_str()); const wcstring txt = wcstring(src, token.source_start, token.source_length);
fprintf(stderr, "Accept token %ls\n", token.describe().c_str());
} }
PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
PARSE_ASSERT(! symbol_stack.empty()); PARSE_ASSERT(! symbol_stack.empty());
@ -605,6 +677,10 @@ void parse_ll_t::accept_token(parse_token_t token)
{ {
if (top_node_match_token(token)) if (top_node_match_token(token))
{ {
if (logit)
{
fprintf(stderr, "Consumed token %ls\n", token.describe().c_str());
}
consumed = true; consumed = true;
break; break;
} }
@ -612,8 +688,16 @@ void parse_ll_t::accept_token(parse_token_t token)
switch (stack_top_type()) switch (stack_top_type())
{ {
/* Symbols */ /* Symbols */
case symbol_statement_list: case symbol_job_list:
accept_token_statement_list(token); accept_token_job_list(token);
break;
case symbol_job:
accept_token_job(token);
break;
case symbol_job_continuation:
accept_token_job_continuation(token);
break; break;
case symbol_statement: case symbol_statement:
@ -621,7 +705,7 @@ void parse_ll_t::accept_token(parse_token_t token)
break; break;
case symbol_block_statement: case symbol_block_statement:
symbol_stack_pop_push(symbol_block_header, symbol_statement_list, parse_keyword_end, symbol_arguments_or_redirections_list); symbol_stack_pop_push(symbol_block_header, parse_token_type_end, symbol_job_list, parse_keyword_end, symbol_arguments_or_redirections_list);
break; break;
case symbol_block_header: case symbol_block_header:
@ -644,7 +728,7 @@ void parse_ll_t::accept_token(parse_token_t token)
break; break;
case symbol_function_header: case symbol_function_header:
symbol_stack_pop_push(parse_keyword_function, symbol_arguments_or_redirections_list, parse_token_type_end); symbol_stack_pop_push(parse_keyword_function, parse_token_type_string, symbol_arguments_or_redirections_list);
break; break;
case symbol_boolean_statement: case symbol_boolean_statement:
@ -673,7 +757,8 @@ void parse_ll_t::accept_token(parse_token_t token)
break; break;
default: default:
fprintf(stderr, "Bailing with token type %ls\n", token_type_description(token.type).c_str()); fprintf(stderr, "Bailing with token type %ls and stack top %ls\n", token_type_description(token.type).c_str(), token_type_description(stack_top_type()).c_str());
exit_without_destructors(EXIT_FAILURE);
break; break;
} }
} }
@ -741,7 +826,7 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_
token.source_start = (size_t)tok_start; token.source_start = (size_t)tok_start;
token.source_length = wcslen(tok_txt); token.source_length = wcslen(tok_txt);
token.keyword = keyword_for_token(tok_type, tok_txt); token.keyword = keyword_for_token(tok_type, tok_txt);
this->parser->accept_token(token); this->parser->accept_token(token, str);
} }
wcstring result = dump_tree(this->parser->nodes, str); wcstring result = dump_tree(this->parser->nodes, str);

View file

@ -20,6 +20,7 @@
class parse_node_t; class parse_node_t;
typedef std::vector<parse_node_t> parse_node_tree_t; typedef std::vector<parse_node_t> parse_node_tree_t;
typedef size_t node_offset_t; typedef size_t node_offset_t;
#define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))
struct parse_error_t struct parse_error_t
{ {
@ -51,7 +52,9 @@ enum parse_token_type_t
token_type_invalid, token_type_invalid,
// Non-terminal tokens // Non-terminal tokens
symbol_statement_list, symbol_job_list,
symbol_job,
symbol_job_continuation,
symbol_statement, symbol_statement,
symbol_block_statement, symbol_block_statement,
symbol_block_header, symbol_block_header,
@ -96,6 +99,9 @@ enum parse_keyword_t
parse_keyword_builtin parse_keyword_builtin
}; };
wcstring token_type_description(parse_token_type_t type);
wcstring keyword_description(parse_keyword_t type);
/** Base class for nodes of a parse tree */ /** Base class for nodes of a parse tree */
class parse_node_t class parse_node_t
{ {
@ -125,30 +131,45 @@ class parse_node_t
explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0) explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0)
{ {
} }
node_offset_t child_offset(node_offset_t which) const
{
PARSE_ASSERT(which < child_count);
return child_start + which;
}
}; };
/* Fish grammar: /* Fish grammar:
# A statement_list is a list of statements, separated by semicolons or newlines # A job_list is a list of jobs, separated by semicolons or newlines
statement_list = <empty> | job_list = <empty> |
statement statement_list <TOK_END> job_list |
job job_list
# A statement is a normal job, or an if / while / and etc, or just a nothing (i.e. newline) # A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation
statement = boolean_statement | block_statement | decorated_statement | <TOK_END> job = statement job_continuation
job_continuation = <empty> |
<TOK_PIPE> statement job_continuation
# A statement is a normal command, or an if / while / and etc
statement = boolean_statement | block_statement | decorated_statement
# A block is a conditional, loop, or begin/end # A block is a conditional, loop, or begin/end
block_statement = block_header statement_list END arguments_or_redirections_list block_statement = block_header STATEMENT_TERMINATOR job_list <END> arguments_or_redirections_list
block_header = if_header | for_header | while_header | function_header | begin_header block_header = if_header | for_header | while_header | function_header | begin_header
if_header = IF statement if_header = IF statement
for_header = FOR var_name IN arguments_or_redirections_list STATEMENT_TERMINATOR for_header = FOR var_name IN arguments_or_redirections_list
while_header = WHILE statement while_header = WHILE statement
begin_header = BEGIN STATEMENT_TERMINATOR begin_header = BEGIN
function_header = FUNCTION arguments_or_redirections_list STATEMENT_TERMINATOR function_header = FUNCTION function_name arguments_or_redirections_list
#(TODO: functions should not support taking redirections in their arguments)
# A boolean statement is AND or OR or NOT # A boolean statement is AND or OR or NOT
@ -157,7 +178,7 @@ class parse_node_t
# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command" # A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command"
decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement
plain_statement = command arguments_or_redirections_list terminator plain_statement = command arguments_or_redirections_list
arguments_or_redirections_list = <empty> | arguments_or_redirections_list = <empty> |
argument_or_redirection arguments_or_redirections_list argument_or_redirection arguments_or_redirections_list