mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-25 20:33:08 +00:00
More work on new parser
This commit is contained in:
parent
b771e97ac6
commit
d840643cb3
3 changed files with 403 additions and 141 deletions
328
parse_exec.cpp
328
parse_exec.cpp
|
@ -24,10 +24,18 @@ struct exec_argument_t
|
|||
node_offset_t parse_node_idx;
|
||||
sanity_id_t command_sanity_id;
|
||||
};
|
||||
typedef std::vector<exec_argument_t> exec_argument_list_t;
|
||||
|
||||
struct exec_redirection_t
|
||||
{
|
||||
node_offset_t parse_node_idx;
|
||||
};
|
||||
typedef std::vector<exec_redirection_t> exec_redirection_list_t;
|
||||
|
||||
struct exec_arguments_and_redirections_t
|
||||
{
|
||||
exec_argument_list_t arguments;
|
||||
exec_redirection_list_t redirections;
|
||||
};
|
||||
|
||||
struct exec_basic_statement_t
|
||||
|
@ -35,6 +43,9 @@ struct exec_basic_statement_t
|
|||
// Node containing the command
|
||||
node_offset_t command_idx;
|
||||
|
||||
// Arguments
|
||||
exec_arguments_and_redirections_t arguments_and_redirections;
|
||||
|
||||
// Decoration
|
||||
enum
|
||||
{
|
||||
|
@ -43,8 +54,6 @@ struct exec_basic_statement_t
|
|||
decoration_builtin
|
||||
} decoration;
|
||||
|
||||
std::vector<exec_argument_t> arguments;
|
||||
std::vector<exec_redirection_t> redirections;
|
||||
uint16_t sanity_id;
|
||||
|
||||
exec_basic_statement_t() : command_idx(0), decoration(decoration_plain)
|
||||
|
@ -70,10 +79,48 @@ struct exec_basic_statement_t
|
|||
PARSER_DIE();
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const exec_argument_list_t &arguments() const
|
||||
{
|
||||
return arguments_and_redirections.arguments;
|
||||
}
|
||||
|
||||
const exec_redirection_list_t &redirections() const
|
||||
{
|
||||
return arguments_and_redirections.redirections;
|
||||
}
|
||||
};
|
||||
|
||||
struct exec_block_statement_t
|
||||
{
|
||||
// Arguments
|
||||
exec_arguments_and_redirections_t arguments_and_redirections;
|
||||
|
||||
const exec_argument_list_t &arguments() const
|
||||
{
|
||||
return arguments_and_redirections.arguments;
|
||||
}
|
||||
|
||||
const exec_redirection_list_t &redirections() const
|
||||
{
|
||||
return arguments_and_redirections.redirections;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct exec_job_t
|
||||
{
|
||||
// List of statements (separated with pipes)
|
||||
std::vector<exec_basic_statement_t> statements;
|
||||
|
||||
void add_statement(const exec_basic_statement_t &statement)
|
||||
{
|
||||
statements.push_back(statement);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class parse_exec_t
|
||||
{
|
||||
parse_node_tree_t parse_tree;
|
||||
|
@ -85,6 +132,9 @@ class parse_exec_t
|
|||
/* The stack of nodes as we execute them */
|
||||
std::vector<exec_node_t> exec_nodes;
|
||||
|
||||
/* The stack of jobs being built */
|
||||
std::vector<exec_job_t> assembling_jobs;
|
||||
|
||||
/* The stack of commands being built */
|
||||
std::vector<exec_basic_statement_t> assembling_statements;
|
||||
|
||||
|
@ -95,7 +145,39 @@ class parse_exec_t
|
|||
PARSE_ASSERT(node.source_start + node.source_length <= src.size());
|
||||
output->assign(src, node.source_start, node.source_length);
|
||||
}
|
||||
|
||||
const parse_node_t &get_child(parse_node_t &parent, node_offset_t which) const
|
||||
{
|
||||
return parse_tree.at(parent.child_offset(which));
|
||||
}
|
||||
|
||||
void pop_push_specific(node_offset_t idx1, node_offset_t idx2 = NODE_OFFSET_INVALID, node_offset_t idx3 = NODE_OFFSET_INVALID, node_offset_t idx4 = NODE_OFFSET_INVALID, node_offset_t idx5 = NODE_OFFSET_INVALID)
|
||||
{
|
||||
PARSE_ASSERT(! exec_nodes.empty());
|
||||
// Figure out the offset of the children
|
||||
exec_node_t &top = exec_nodes.back();
|
||||
const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx);
|
||||
node_offset_t child_node_idx = parse_node.child_start;
|
||||
|
||||
// Remove the top node
|
||||
exec_nodes.pop_back();
|
||||
|
||||
// Append the given children, backwards
|
||||
sanity_id_t command_sanity_id = assembling_statements.empty() ? 0 : assembling_statements.back().sanity_id;
|
||||
const node_offset_t idxs[] = {idx5, idx4, idx3, idx2, idx1};
|
||||
for (size_t q=0; q < sizeof idxs / sizeof *idxs; q++)
|
||||
{
|
||||
node_offset_t idx = idxs[q];
|
||||
if (idx != (node_offset_t)(-1))
|
||||
{
|
||||
PARSE_ASSERT(idx < parse_node.child_count);
|
||||
exec_nodes.push_back(child_node_idx + idx);
|
||||
exec_nodes.back().command_sanity_id = command_sanity_id;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void pop_push(node_offset_t child_idx, node_offset_t child_count = 1)
|
||||
{
|
||||
PARSE_ASSERT(! exec_nodes.empty());
|
||||
|
@ -139,90 +221,130 @@ class parse_exec_t
|
|||
pop_push(0, parse_node.child_count);
|
||||
}
|
||||
|
||||
void assemble_command(node_offset_t idx)
|
||||
{
|
||||
// Set the command for our top basic statement
|
||||
PARSE_ASSERT(! assembling_statements.empty());
|
||||
assembling_statements.back().command_idx = idx;
|
||||
}
|
||||
|
||||
void assemble_argument_or_redirection(node_offset_t idx)
|
||||
void assemble_1_argument_or_redirection(node_offset_t idx, exec_arguments_and_redirections_t *output) const
|
||||
{
|
||||
const parse_node_t &node = parse_tree.at(idx);
|
||||
PARSE_ASSERT(! assembling_statements.empty());
|
||||
exec_basic_statement_t &statement = assembling_statements.back();
|
||||
switch (node.type)
|
||||
PARSE_ASSERT(output != NULL);
|
||||
PARSE_ASSERT(node.type == symbol_argument_or_redirection);
|
||||
PARSE_ASSERT(node.child_count == 1);
|
||||
node_offset_t child_idx = node.child_offset(0);
|
||||
const parse_node_t &child = parse_tree.at(child_idx);
|
||||
switch (child.type)
|
||||
{
|
||||
case parse_token_type_string:
|
||||
// Argument
|
||||
{
|
||||
exec_argument_t arg = exec_argument_t();
|
||||
arg.parse_node_idx = idx;
|
||||
arg.command_sanity_id = statement.sanity_id;
|
||||
statement.arguments.push_back(arg);
|
||||
output->arguments.push_back(arg);
|
||||
}
|
||||
break;
|
||||
|
||||
case parse_token_type_redirection:
|
||||
// Redirection
|
||||
{
|
||||
exec_redirection_t redirect = exec_redirection_t();
|
||||
redirect.parse_node_idx = idx;
|
||||
output->redirections.push_back(redirect);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
PARSER_DIE();
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void assembly_complete()
|
||||
void assemble_arguments_and_redirections(node_offset_t start_idx, exec_arguments_and_redirections_t *output) const
|
||||
{
|
||||
// Finished building a command
|
||||
PARSE_ASSERT(! assembling_statements.empty());
|
||||
const exec_basic_statement_t &statement = assembling_statements.back();
|
||||
node_offset_t idx = start_idx;
|
||||
for (;;)
|
||||
{
|
||||
const parse_node_t &node = parse_tree.at(idx);
|
||||
PARSE_ASSERT(node.type == symbol_arguments_or_redirections_list);
|
||||
PARSE_ASSERT(node.child_count == 0 || node.child_count == 2);
|
||||
if (node.child_count == 0)
|
||||
{
|
||||
// No more children
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Skip to next child
|
||||
assemble_1_argument_or_redirection(node.child_offset(0), output);
|
||||
idx = node.child_offset(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void assemble_command_for_plain_statement(node_offset_t idx, parse_keyword_t decoration)
|
||||
{
|
||||
const parse_node_t &node = parse_tree.at(idx);
|
||||
PARSE_ASSERT(node.type == symbol_plain_statement);
|
||||
PARSE_ASSERT(node.child_count == 2);
|
||||
exec_basic_statement_t statement;
|
||||
statement.set_decoration(decoration);
|
||||
statement.command_idx = node.child_offset(0);
|
||||
assemble_arguments_and_redirections(node.child_offset(1), &statement.arguments_and_redirections);
|
||||
assembling_jobs.back().add_statement(statement);
|
||||
}
|
||||
|
||||
void job_assembly_complete()
|
||||
{
|
||||
PARSE_ASSERT(! assembling_jobs.empty());
|
||||
const exec_job_t &job = assembling_jobs.back();
|
||||
|
||||
if (simulating)
|
||||
{
|
||||
simulate_statement(statement);
|
||||
simulate_job(job);
|
||||
}
|
||||
assembling_statements.pop_back();
|
||||
assembling_jobs.pop_back();
|
||||
}
|
||||
|
||||
void simulate_statement(const exec_basic_statement_t &statement)
|
||||
void simulate_job(const exec_job_t &job)
|
||||
{
|
||||
PARSE_ASSERT(simulating);
|
||||
wcstring line;
|
||||
switch (statement.decoration)
|
||||
for (size_t i=0; i < job.statements.size(); i++)
|
||||
{
|
||||
case exec_basic_statement_t::decoration_builtin:
|
||||
line.append(L"<builtin> ");
|
||||
break;
|
||||
|
||||
case exec_basic_statement_t::decoration_command:
|
||||
line.append(L"<command> ");
|
||||
break;
|
||||
if (i > 0)
|
||||
{
|
||||
line.append(L" <pipe> ");
|
||||
}
|
||||
const exec_basic_statement_t &statement = job.statements.at(i);
|
||||
switch (statement.decoration)
|
||||
{
|
||||
case exec_basic_statement_t::decoration_builtin:
|
||||
line.append(L"<builtin> ");
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
wcstring tmp;
|
||||
get_node_string(statement.command_idx, &tmp);
|
||||
line.append(L"cmd:");
|
||||
line.append(tmp);
|
||||
for (size_t i=0; i < statement.arguments.size(); i++)
|
||||
{
|
||||
const exec_argument_t &arg = statement.arguments.at(i);
|
||||
get_node_string(arg.parse_node_idx, &tmp);
|
||||
line.append(L" ");
|
||||
line.append(L"arg:");
|
||||
case exec_basic_statement_t::decoration_command:
|
||||
line.append(L"<command> ");
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
wcstring tmp;
|
||||
get_node_string(statement.command_idx, &tmp);
|
||||
line.append(L"cmd:");
|
||||
line.append(tmp);
|
||||
for (size_t i=0; i < statement.arguments().size(); i++)
|
||||
{
|
||||
const exec_argument_t &arg = statement.arguments().at(i);
|
||||
get_node_string(arg.parse_node_idx, &tmp);
|
||||
line.append(L" ");
|
||||
line.append(L"arg:");
|
||||
line.append(tmp);
|
||||
}
|
||||
}
|
||||
simulation_result.push_back(line);
|
||||
simulation_result.push_back(line);
|
||||
}
|
||||
|
||||
void enter_parse_node(size_t idx);
|
||||
void run_top_node(void);
|
||||
exec_basic_statement_t *create_basic_statement(void);
|
||||
exec_job_t *create_job(void);
|
||||
|
||||
public:
|
||||
parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), simulating(false)
|
||||
|
@ -231,10 +353,10 @@ class parse_exec_t
|
|||
wcstring simulate(void);
|
||||
};
|
||||
|
||||
exec_basic_statement_t *parse_exec_t::create_basic_statement()
|
||||
exec_job_t *parse_exec_t::create_job()
|
||||
{
|
||||
assembling_statements.push_back(exec_basic_statement_t());
|
||||
return &assembling_statements.back();
|
||||
assembling_jobs.push_back(exec_job_t());
|
||||
return &assembling_jobs.back();
|
||||
}
|
||||
|
||||
void parse_exec_t::run_top_node()
|
||||
|
@ -242,7 +364,7 @@ void parse_exec_t::run_top_node()
|
|||
PARSE_ASSERT(! exec_nodes.empty());
|
||||
exec_node_t &top = exec_nodes.back();
|
||||
const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx);
|
||||
bool log = false;
|
||||
bool log = true;
|
||||
|
||||
if (log)
|
||||
{
|
||||
|
@ -254,68 +376,102 @@ void parse_exec_t::run_top_node()
|
|||
|
||||
switch (parse_node.type)
|
||||
{
|
||||
case symbol_statement_list:
|
||||
case symbol_job_list:
|
||||
PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2);
|
||||
if (parse_node.child_count == 0)
|
||||
{
|
||||
// No more jobs, done
|
||||
pop();
|
||||
}
|
||||
else if (parse_tree.at(parse_node.child_start + 0).type == parse_token_type_end)
|
||||
{
|
||||
// Empty job, so just skip it
|
||||
pop_push(1, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Normal job
|
||||
pop_push(0, 2);
|
||||
}
|
||||
break;
|
||||
|
||||
case symbol_job:
|
||||
{
|
||||
PARSE_ASSERT(parse_node.child_count == 2);
|
||||
exec_job_t *job = create_job();
|
||||
pop_push_all();
|
||||
break;
|
||||
|
||||
case symbol_statement:
|
||||
{
|
||||
PARSE_ASSERT(parse_node.child_count == 1);
|
||||
// See if we're just an empty statement
|
||||
const parse_node_t &child = parse_tree.at(parse_node.child_start + 0);
|
||||
if (child.type == parse_token_type_end)
|
||||
}
|
||||
|
||||
case symbol_job_continuation:
|
||||
PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 3);
|
||||
if (parse_node.child_count == 0)
|
||||
{
|
||||
// Empty statement
|
||||
// All done with this job
|
||||
job_assembly_complete();
|
||||
pop();
|
||||
}
|
||||
else
|
||||
{
|
||||
// We have a statement to execute
|
||||
pop_push_all();
|
||||
// Skip the pipe
|
||||
pop_push(1, 2);
|
||||
}
|
||||
break;
|
||||
|
||||
case symbol_statement:
|
||||
{
|
||||
PARSE_ASSERT(parse_node.child_count == 1);
|
||||
pop_push_all();
|
||||
break;
|
||||
}
|
||||
|
||||
case symbol_block_statement:
|
||||
{
|
||||
PARSE_ASSERT(parse_node.child_count == 5);
|
||||
pop_push_specific(0, 2, 4);
|
||||
break;
|
||||
}
|
||||
|
||||
case symbol_block_header:
|
||||
{
|
||||
PARSE_ASSERT(parse_node.child_count == 1);
|
||||
pop_push_all();
|
||||
break;
|
||||
}
|
||||
|
||||
case symbol_function_header:
|
||||
{
|
||||
PARSE_ASSERT(parse_node.child_count == 3);
|
||||
//pop_push_all();
|
||||
pop();
|
||||
break;
|
||||
}
|
||||
|
||||
case symbol_decorated_statement:
|
||||
{
|
||||
PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2 );
|
||||
exec_basic_statement_t *cmd = create_basic_statement();
|
||||
cmd->set_decoration(parse_node.tag);
|
||||
PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2);
|
||||
|
||||
// Push the last node (skip any decoration)
|
||||
pop_push(parse_node.child_count - 1, 1);
|
||||
node_offset_t plain_statement_idx = parse_node.child_offset(parse_node.child_count - 1);
|
||||
parse_keyword_t decoration = static_cast<parse_keyword_t>(parse_node.tag);
|
||||
assemble_command_for_plain_statement(plain_statement_idx, decoration);
|
||||
pop();
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// The following symbols should be handled by their parents, i.e. never pushed on our stack
|
||||
case symbol_plain_statement:
|
||||
PARSE_ASSERT(parse_node.child_count == 3);
|
||||
// Extract the command
|
||||
PARSE_ASSERT(! assembling_statements.empty());
|
||||
assemble_command(parse_node.child_start + 0);
|
||||
// Jump to statement list, then terminator
|
||||
pop_push(1, 2);
|
||||
break;
|
||||
|
||||
case symbol_arguments_or_redirections_list:
|
||||
PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2);
|
||||
pop_push_all();
|
||||
break;
|
||||
|
||||
case symbol_argument_or_redirection:
|
||||
PARSE_ASSERT(parse_node.child_count == 1);
|
||||
assemble_argument_or_redirection(parse_node.child_start + 0);
|
||||
pop();
|
||||
PARSER_DIE();
|
||||
break;
|
||||
|
||||
case parse_token_type_end:
|
||||
PARSE_ASSERT(parse_node.child_count == 0);
|
||||
assembly_complete();
|
||||
pop();
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "Unhandled token type %ld\n", (long)parse_node.type);
|
||||
fprintf(stderr, "Unhandled token type %ls at index %ld\n", token_type_description(parse_node.type).c_str(), top.parse_node_idx);
|
||||
PARSER_DIE();
|
||||
break;
|
||||
|
||||
|
|
173
parse_tree.cpp
173
parse_tree.cpp
|
@ -38,13 +38,16 @@ wcstring parse_error_t::describe(const wcstring &src) const
|
|||
return result;
|
||||
}
|
||||
|
||||
static wcstring token_type_description(parse_token_type_t type)
|
||||
wcstring token_type_description(parse_token_type_t type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case token_type_invalid: return L"invalid";
|
||||
|
||||
case symbol_statement_list: return L"statement_list";
|
||||
case symbol_job_list: return L"job_list";
|
||||
case symbol_job: return L"job";
|
||||
case symbol_job_continuation: return L"job_continuation";
|
||||
|
||||
case symbol_statement: return L"statement";
|
||||
case symbol_block_statement: return L"block_statement";
|
||||
case symbol_block_header: return L"block_header";
|
||||
|
@ -70,6 +73,30 @@ static wcstring token_type_description(parse_token_type_t type)
|
|||
}
|
||||
}
|
||||
|
||||
wcstring keyword_description(parse_keyword_t k)
|
||||
{
|
||||
switch (k)
|
||||
{
|
||||
case parse_keyword_none: return L"none";
|
||||
case parse_keyword_if: return L"if";
|
||||
case parse_keyword_else: return L"else";
|
||||
case parse_keyword_for: return L"for";
|
||||
case parse_keyword_in: return L"in";
|
||||
case parse_keyword_while: return L"while";
|
||||
case parse_keyword_begin: return L"begin";
|
||||
case parse_keyword_function: return L"function";
|
||||
case parse_keyword_switch: return L"switch";
|
||||
case parse_keyword_end: return L"end";
|
||||
case parse_keyword_and: return L"and";
|
||||
case parse_keyword_or: return L"or";
|
||||
case parse_keyword_not: return L"not";
|
||||
case parse_keyword_command: return L"command";
|
||||
case parse_keyword_builtin: return L"builtin";
|
||||
default:
|
||||
return format_string(L"Unknown keyword type %ld", static_cast<long>(k));
|
||||
}
|
||||
}
|
||||
|
||||
wcstring parse_node_t::describe(void) const
|
||||
{
|
||||
wcstring result = token_type_description(type);
|
||||
|
@ -83,8 +110,20 @@ struct parse_token_t
|
|||
enum parse_keyword_t keyword; // Any keyword represented by this parser
|
||||
size_t source_start;
|
||||
size_t source_length;
|
||||
|
||||
wcstring describe() const;
|
||||
};
|
||||
|
||||
wcstring parse_token_t::describe(void) const
|
||||
{
|
||||
wcstring result = token_type_description(type);
|
||||
if (keyword != parse_keyword_none)
|
||||
{
|
||||
append_format(result, L" <%ls>", keyword_description(keyword).c_str());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Convert from tokenizer_t's token type to our token
|
||||
static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type)
|
||||
{
|
||||
|
@ -124,12 +163,12 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &
|
|||
const size_t spacesPerIndent = 2;
|
||||
|
||||
// unindent statement lists by 1 to flatten them
|
||||
if (node.type == symbol_statement_list || node.type == symbol_arguments_or_redirections_list)
|
||||
if (node.type == symbol_job_list || node.type == symbol_arguments_or_redirections_list)
|
||||
{
|
||||
if (indent > 0) indent -= 1;
|
||||
}
|
||||
|
||||
append_format(*result, L"%2lu ", *line);
|
||||
append_format(*result, L"%2lu - %l2u ", *line, start);
|
||||
result->append(indent * spacesPerIndent, L' ');;
|
||||
result->append(node.describe());
|
||||
if (node.child_count > 0)
|
||||
|
@ -190,17 +229,19 @@ class parse_ll_t
|
|||
parse_ll_t() : fatal_errored(false)
|
||||
{
|
||||
// initial node
|
||||
parse_stack_element_t elem = symbol_statement_list;
|
||||
parse_stack_element_t elem = symbol_job_list;
|
||||
elem.node_idx = 0;
|
||||
symbol_stack.push_back(elem); // goal token
|
||||
nodes.push_back(parse_node_t(symbol_statement_list));
|
||||
nodes.push_back(parse_node_t(symbol_job_list));
|
||||
}
|
||||
|
||||
bool top_node_match_token(parse_token_t token);
|
||||
|
||||
// implementation of certain parser constructions
|
||||
void accept_token(parse_token_t token);
|
||||
void accept_token_statement_list(parse_token_t token);
|
||||
void accept_token(parse_token_t token, const wcstring &src);
|
||||
void accept_token_job_list(parse_token_t token);
|
||||
void accept_token_job(parse_token_t token);
|
||||
void accept_token_job_continuation(parse_token_t token);
|
||||
void accept_token_statement(parse_token_t token);
|
||||
void accept_token_block_header(parse_token_t token);
|
||||
void accept_token_boolean_statement(parse_token_t token);
|
||||
|
@ -289,7 +330,7 @@ class parse_ll_t
|
|||
|
||||
void parse_ll_t::token_unhandled(parse_token_t token, const char *function)
|
||||
{
|
||||
fprintf(stderr, "Unhandled token with type %d in function %s\n", (int)token.type, function);
|
||||
fprintf(stderr, "Unhandled token with type %ls in function %s\n", token_type_description(token.type).c_str(), function);
|
||||
PARSER_DIE();
|
||||
}
|
||||
|
||||
|
@ -304,17 +345,33 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token)
|
|||
fatal_errored = true;
|
||||
}
|
||||
|
||||
void parse_ll_t::accept_token_statement_list(parse_token_t token)
|
||||
void parse_ll_t::accept_token_job_list(parse_token_t token)
|
||||
{
|
||||
PARSE_ASSERT(stack_top_type() == symbol_statement_list);
|
||||
PARSE_ASSERT(stack_top_type() == symbol_job_list);
|
||||
switch (token.type)
|
||||
{
|
||||
case parse_token_type_string:
|
||||
// 'end' is special
|
||||
if (token.keyword == parse_keyword_end)
|
||||
{
|
||||
// End this job list
|
||||
symbol_stack_pop_push();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Normal string
|
||||
symbol_stack_pop_push(symbol_job, symbol_job_list);
|
||||
}
|
||||
break;
|
||||
|
||||
case parse_token_type_pipe:
|
||||
case parse_token_type_redirection:
|
||||
case parse_token_background:
|
||||
symbol_stack_pop_push(symbol_job, symbol_job_list);
|
||||
break;
|
||||
|
||||
case parse_token_type_end:
|
||||
symbol_stack_pop_push(symbol_statement, symbol_statement_list);
|
||||
symbol_stack_pop_push(parse_token_type_end, symbol_job_list);
|
||||
break;
|
||||
|
||||
case parse_token_type_terminate:
|
||||
|
@ -328,6 +385,30 @@ void parse_ll_t::accept_token_statement_list(parse_token_t token)
|
|||
}
|
||||
}
|
||||
|
||||
void parse_ll_t::accept_token_job(parse_token_t token)
|
||||
{
|
||||
PARSE_ASSERT(stack_top_type() == symbol_job);
|
||||
symbol_stack_pop_push(symbol_statement, symbol_job_continuation);
|
||||
}
|
||||
|
||||
void parse_ll_t::accept_token_job_continuation(parse_token_t token)
|
||||
{
|
||||
PARSE_ASSERT(stack_top_type() == symbol_job_continuation);
|
||||
switch (token.type)
|
||||
{
|
||||
case parse_token_type_pipe:
|
||||
// Pipe, continuation
|
||||
symbol_stack_pop_push(parse_token_type_pipe, symbol_statement, symbol_job_continuation);
|
||||
break;
|
||||
|
||||
default:
|
||||
// Not a pipe, no job continuation
|
||||
symbol_stack_pop_push();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void parse_ll_t::accept_token_statement(parse_token_t token)
|
||||
{
|
||||
PARSE_ASSERT(stack_top_type() == symbol_statement);
|
||||
|
@ -341,25 +422,13 @@ void parse_ll_t::accept_token_statement(parse_token_t token)
|
|||
case parse_keyword_not:
|
||||
symbol_stack_pop_push(symbol_boolean_statement);
|
||||
break;
|
||||
|
||||
|
||||
case parse_keyword_if:
|
||||
symbol_stack_pop_push(symbol_if_header);
|
||||
break;
|
||||
|
||||
case parse_keyword_for:
|
||||
symbol_stack_pop_push(symbol_for_header);
|
||||
break;
|
||||
|
||||
case parse_keyword_while:
|
||||
symbol_stack_pop_push(symbol_while_header);
|
||||
break;
|
||||
|
||||
case parse_keyword_begin:
|
||||
symbol_stack_pop_push(symbol_begin_header);
|
||||
break;
|
||||
|
||||
case parse_keyword_function:
|
||||
symbol_stack_pop_push(symbol_function_header);
|
||||
case parse_keyword_begin:
|
||||
symbol_stack_pop_push(symbol_block_statement);
|
||||
break;
|
||||
|
||||
case parse_keyword_else:
|
||||
|
@ -370,7 +439,7 @@ void parse_ll_t::accept_token_statement(parse_token_t token)
|
|||
break;
|
||||
|
||||
case parse_keyword_end:
|
||||
// TODO
|
||||
PARSER_DIE(); //todo
|
||||
break;
|
||||
|
||||
// 'in' is only special within a for_header
|
||||
|
@ -384,11 +453,6 @@ void parse_ll_t::accept_token_statement(parse_token_t token)
|
|||
}
|
||||
break;
|
||||
|
||||
case parse_token_type_end:
|
||||
// Empty line, or just a semicolon
|
||||
symbol_stack_pop_push(parse_token_type_end);
|
||||
break;
|
||||
|
||||
case parse_token_type_pipe:
|
||||
case parse_token_type_redirection:
|
||||
case parse_token_background:
|
||||
|
@ -415,7 +479,7 @@ void parse_ll_t::accept_token_block_header(parse_token_t token)
|
|||
break;
|
||||
|
||||
case parse_keyword_else:
|
||||
//todo
|
||||
PARSER_DIE(); //todo
|
||||
break;
|
||||
|
||||
case parse_keyword_for:
|
||||
|
@ -508,7 +572,7 @@ void parse_ll_t::accept_token_decorated_statement(parse_token_t token)
|
|||
void parse_ll_t::accept_token_plain_statement(parse_token_t token)
|
||||
{
|
||||
PARSE_ASSERT(stack_top_type() == symbol_plain_statement);
|
||||
symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list, parse_token_type_end);
|
||||
symbol_stack_pop_push(parse_token_type_string, symbol_arguments_or_redirections_list);
|
||||
}
|
||||
|
||||
void parse_ll_t::accept_token_arguments_or_redirections_list(parse_token_t token)
|
||||
|
@ -588,15 +652,23 @@ bool parse_ll_t::top_node_match_token(parse_token_t token)
|
|||
symbol_stack.pop_back();
|
||||
result = true;
|
||||
}
|
||||
else if (token.type == parse_token_type_pipe)
|
||||
{
|
||||
// Pipes are primitive
|
||||
symbol_stack.pop_back();
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void parse_ll_t::accept_token(parse_token_t token)
|
||||
void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
|
||||
{
|
||||
if (1)
|
||||
bool logit = true;
|
||||
if (logit)
|
||||
{
|
||||
fprintf(stderr, "Accept token of type %ls\n", token_type_description(token.type).c_str());
|
||||
const wcstring txt = wcstring(src, token.source_start, token.source_length);
|
||||
fprintf(stderr, "Accept token %ls\n", token.describe().c_str());
|
||||
}
|
||||
PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
|
||||
PARSE_ASSERT(! symbol_stack.empty());
|
||||
|
@ -605,6 +677,10 @@ void parse_ll_t::accept_token(parse_token_t token)
|
|||
{
|
||||
if (top_node_match_token(token))
|
||||
{
|
||||
if (logit)
|
||||
{
|
||||
fprintf(stderr, "Consumed token %ls\n", token.describe().c_str());
|
||||
}
|
||||
consumed = true;
|
||||
break;
|
||||
}
|
||||
|
@ -612,16 +688,24 @@ void parse_ll_t::accept_token(parse_token_t token)
|
|||
switch (stack_top_type())
|
||||
{
|
||||
/* Symbols */
|
||||
case symbol_statement_list:
|
||||
accept_token_statement_list(token);
|
||||
case symbol_job_list:
|
||||
accept_token_job_list(token);
|
||||
break;
|
||||
|
||||
case symbol_job:
|
||||
accept_token_job(token);
|
||||
break;
|
||||
|
||||
case symbol_job_continuation:
|
||||
accept_token_job_continuation(token);
|
||||
break;
|
||||
|
||||
case symbol_statement:
|
||||
accept_token_statement(token);
|
||||
break;
|
||||
|
||||
case symbol_block_statement:
|
||||
symbol_stack_pop_push(symbol_block_header, symbol_statement_list, parse_keyword_end, symbol_arguments_or_redirections_list);
|
||||
symbol_stack_pop_push(symbol_block_header, parse_token_type_end, symbol_job_list, parse_keyword_end, symbol_arguments_or_redirections_list);
|
||||
break;
|
||||
|
||||
case symbol_block_header:
|
||||
|
@ -644,7 +728,7 @@ void parse_ll_t::accept_token(parse_token_t token)
|
|||
break;
|
||||
|
||||
case symbol_function_header:
|
||||
symbol_stack_pop_push(parse_keyword_function, symbol_arguments_or_redirections_list, parse_token_type_end);
|
||||
symbol_stack_pop_push(parse_keyword_function, parse_token_type_string, symbol_arguments_or_redirections_list);
|
||||
break;
|
||||
|
||||
case symbol_boolean_statement:
|
||||
|
@ -673,7 +757,8 @@ void parse_ll_t::accept_token(parse_token_t token)
|
|||
break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "Bailing with token type %ls\n", token_type_description(token.type).c_str());
|
||||
fprintf(stderr, "Bailing with token type %ls and stack top %ls\n", token_type_description(token.type).c_str(), token_type_description(stack_top_type()).c_str());
|
||||
exit_without_destructors(EXIT_FAILURE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -741,7 +826,7 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_
|
|||
token.source_start = (size_t)tok_start;
|
||||
token.source_length = wcslen(tok_txt);
|
||||
token.keyword = keyword_for_token(tok_type, tok_txt);
|
||||
this->parser->accept_token(token);
|
||||
this->parser->accept_token(token, str);
|
||||
}
|
||||
|
||||
wcstring result = dump_tree(this->parser->nodes, str);
|
||||
|
|
43
parse_tree.h
43
parse_tree.h
|
@ -20,6 +20,7 @@
|
|||
class parse_node_t;
|
||||
typedef std::vector<parse_node_t> parse_node_tree_t;
|
||||
typedef size_t node_offset_t;
|
||||
#define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))
|
||||
|
||||
struct parse_error_t
|
||||
{
|
||||
|
@ -51,7 +52,9 @@ enum parse_token_type_t
|
|||
token_type_invalid,
|
||||
|
||||
// Non-terminal tokens
|
||||
symbol_statement_list,
|
||||
symbol_job_list,
|
||||
symbol_job,
|
||||
symbol_job_continuation,
|
||||
symbol_statement,
|
||||
symbol_block_statement,
|
||||
symbol_block_header,
|
||||
|
@ -96,6 +99,9 @@ enum parse_keyword_t
|
|||
parse_keyword_builtin
|
||||
};
|
||||
|
||||
wcstring token_type_description(parse_token_type_t type);
|
||||
wcstring keyword_description(parse_keyword_t type);
|
||||
|
||||
/** Base class for nodes of a parse tree */
|
||||
class parse_node_t
|
||||
{
|
||||
|
@ -125,31 +131,46 @@ class parse_node_t
|
|||
explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0)
|
||||
{
|
||||
}
|
||||
|
||||
node_offset_t child_offset(node_offset_t which) const
|
||||
{
|
||||
PARSE_ASSERT(which < child_count);
|
||||
return child_start + which;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
/* Fish grammar:
|
||||
|
||||
# A statement_list is a list of statements, separated by semicolons or newlines
|
||||
# A job_list is a list of jobs, separated by semicolons or newlines
|
||||
|
||||
statement_list = <empty> |
|
||||
statement statement_list
|
||||
job_list = <empty> |
|
||||
<TOK_END> job_list |
|
||||
job job_list
|
||||
|
||||
# A statement is a normal job, or an if / while / and etc, or just a nothing (i.e. newline)
|
||||
# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation
|
||||
|
||||
statement = boolean_statement | block_statement | decorated_statement | <TOK_END>
|
||||
job = statement job_continuation
|
||||
job_continuation = <empty> |
|
||||
<TOK_PIPE> statement job_continuation
|
||||
|
||||
# A statement is a normal command, or an if / while / and etc
|
||||
|
||||
statement = boolean_statement | block_statement | decorated_statement
|
||||
|
||||
# A block is a conditional, loop, or begin/end
|
||||
|
||||
block_statement = block_header statement_list END arguments_or_redirections_list
|
||||
block_statement = block_header STATEMENT_TERMINATOR job_list <END> arguments_or_redirections_list
|
||||
block_header = if_header | for_header | while_header | function_header | begin_header
|
||||
if_header = IF statement
|
||||
for_header = FOR var_name IN arguments_or_redirections_list STATEMENT_TERMINATOR
|
||||
for_header = FOR var_name IN arguments_or_redirections_list
|
||||
while_header = WHILE statement
|
||||
begin_header = BEGIN STATEMENT_TERMINATOR
|
||||
function_header = FUNCTION arguments_or_redirections_list STATEMENT_TERMINATOR
|
||||
begin_header = BEGIN
|
||||
function_header = FUNCTION function_name arguments_or_redirections_list
|
||||
|
||||
#(TODO: functions should not support taking redirections in their arguments)
|
||||
|
||||
# A boolean statement is AND or OR or NOT
|
||||
|
||||
boolean_statement = AND statement | OR statement | NOT statement
|
||||
|
@ -157,7 +178,7 @@ class parse_node_t
|
|||
# A decorated_statement is a command with a list of arguments_or_redirections, possibly with "builtin" or "command"
|
||||
|
||||
decorated_statement = COMMAND plain_statement | BUILTIN plain_statement | plain_statement
|
||||
plain_statement = command arguments_or_redirections_list terminator
|
||||
plain_statement = command arguments_or_redirections_list
|
||||
|
||||
arguments_or_redirections_list = <empty> |
|
||||
argument_or_redirection arguments_or_redirections_list
|
||||
|
|
Loading…
Reference in a new issue