Added parse builtin for testing. Lots of work on simulated execution.

This commit is contained in:
ridiculousfish 2013-06-15 14:32:38 -07:00
parent e2a506e54a
commit 827a9d640c
5 changed files with 295 additions and 53 deletions

View file

@ -64,6 +64,8 @@
#include "expand.h" #include "expand.h"
#include "path.h" #include "path.h"
#include "history.h" #include "history.h"
#include "parse_tree.h"
#include "parse_exec.h"
/** /**
The default prompt for the read command The default prompt for the read command
@ -3938,6 +3940,30 @@ static int builtin_history(parser_t &parser, wchar_t **argv)
return STATUS_BUILTIN_ERROR; return STATUS_BUILTIN_ERROR;
} }
int builtin_parse(parser_t &parser, wchar_t **argv)
{
std::vector<char> txt;
for (;;)
{
char buff[256];
ssize_t amt = read_loop(builtin_stdin, buff, sizeof buff);
if (amt <= 0) break;
txt.insert(txt.end(), buff, buff + amt);
}
if (! txt.empty())
{
const wcstring src = str2wcstring(&txt.at(0), txt.size());
parse_node_tree_t parse_tree;
parse_t parser;
parser.parse(src, &parse_tree);
parse_execution_context_t ctx(parse_tree, src);
stdout_buffer.append(L"Simulating execution:");
wcstring simulation = ctx.simulate();
stdout_buffer.append(simulation);
stdout_buffer.push_back(L'\n');
}
return STATUS_BUILTIN_OK;
}
/* /*
END OF BUILTIN COMMANDS END OF BUILTIN COMMANDS
@ -3985,6 +4011,7 @@ static const builtin_data_t builtin_datas[]=
{ L"jobs", &builtin_jobs, N_(L"Print currently running jobs") }, { L"jobs", &builtin_jobs, N_(L"Print currently running jobs") },
{ L"not", &builtin_generic, N_(L"Negate exit status of job") }, { L"not", &builtin_generic, N_(L"Negate exit status of job") },
{ L"or", &builtin_generic, N_(L"Execute command if previous command failed") }, { L"or", &builtin_generic, N_(L"Execute command if previous command failed") },
{ L"parse", &builtin_parse, N_(L"Try out the new parser") },
{ L"printf", &builtin_printf, N_(L"Prints formatted text") }, { L"printf", &builtin_printf, N_(L"Prints formatted text") },
{ L"pwd", &builtin_pwd, N_(L"Print the working directory") }, { L"pwd", &builtin_pwd, N_(L"Print the working directory") },
{ L"random", &builtin_random, N_(L"Generate random number") }, { L"random", &builtin_random, N_(L"Generate random number") },
@ -4144,4 +4171,3 @@ void builtin_pop_io(parser_t &parser)
builtin_stdin = 0; builtin_stdin = 0;
} }
} }

View file

@ -1722,8 +1722,16 @@ void history_tests_t::test_history_speed(void)
static void test_new_parser(void) static void test_new_parser(void)
{ {
say(L"Testing new parser!"); say(L"Testing new parser!");
const wcstring src = L"echo hello world";
parse_node_tree_t parse_tree;
parse_t parser; parse_t parser;
parser.parse(L"echo hello"); parser.parse(src, &parse_tree);
parse_execution_context_t ctx(parse_tree, src);
say(L"Simulating execution:");
wcstring simulation = ctx.simulate();
printf("%ls\n", simulation.c_str());
} }
/** /**

View file

@ -1,34 +1,41 @@
#include "parse_exec.h" #include "parse_exec.h"
#include <stack> #include <stack>
typedef uint16_t sanity_id_t;
static sanity_id_t next_sanity_id()
{
static sanity_id_t last_sanity_id;
return ++last_sanity_id;
}
struct exec_node_t struct exec_node_t
{ {
node_offset_t parse_node_idx; node_offset_t parse_node_idx;
sanity_id_t command_sanity_id;
exec_node_t(size_t pni) : parse_node_idx(pni) exec_node_t(size_t pni) : parse_node_idx(pni)
{ {
} }
virtual ~exec_node_t();
}; };
exec_node_t::~exec_node_t() struct exec_argument_t
{ {
} node_offset_t parse_node_idx;
sanity_id_t command_sanity_id;
};
struct exec_redirection_t : public exec_node_t struct exec_redirection_t
{ {
}; };
struct exec_argument_t : public exec_node_t struct exec_basic_statement_t
{ {
// Node containing the command
node_offset_t command_idx;
}; // Decoration
struct exec_statement_t
{
enum enum
{ {
decoration_plain, decoration_plain,
@ -38,68 +45,260 @@ struct exec_statement_t
std::vector<exec_argument_t> arguments; std::vector<exec_argument_t> arguments;
std::vector<exec_redirection_t> redirections; std::vector<exec_redirection_t> redirections;
uint16_t sanity_id;
exec_basic_statement_t() : command_idx(0), decoration(decoration_plain)
{
sanity_id = next_sanity_id();
}
void set_decoration(uint32_t k)
{
PARSE_ASSERT(k == parse_keyword_none || k == parse_keyword_command || k == parse_keyword_builtin);
switch (k)
{
case parse_keyword_none:
decoration = decoration_plain;
break;
case parse_keyword_command:
decoration = decoration_command;
break;
case parse_keyword_builtin:
decoration = decoration_builtin;
break;
default:
PARSER_DIE();
break;
}
}
}; };
class parse_exec_t class parse_exec_t
{ {
parse_node_tree_t parse_tree; parse_node_tree_t parse_tree;
wcstring src; wcstring src;
bool simulating;
wcstring_list_t simulation_result;
/* The stack of nodes as we execute them */
std::vector<exec_node_t> exec_nodes; std::vector<exec_node_t> exec_nodes;
parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s) /* The stack of commands being built */
std::vector<exec_basic_statement_t> assembling_statements;
void get_node_string(node_offset_t idx, wcstring *output) const
{ {
const parse_node_t &node = parse_tree.at(idx);
PARSE_ASSERT(node.source_start <= src.size());
PARSE_ASSERT(node.source_start + node.source_length <= src.size());
output->assign(src, node.source_start, node.source_length);
}
void pop_push(node_offset_t child_idx, node_offset_t child_count = 1)
{
PARSE_ASSERT(! exec_nodes.empty());
if (child_count == 0)
{
// No children, just remove the top node
exec_nodes.pop_back();
}
else
{
// Figure out the offset of the children
exec_node_t &top = exec_nodes.back();
const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx);
PARSE_ASSERT(child_idx < parse_node.child_count);
node_offset_t child_node_idx = parse_node.child_start + child_idx;
// Remove the top node
exec_nodes.pop_back();
// Append the given children, backwards
sanity_id_t command_sanity_id = assembling_statements.empty() ? 0 : assembling_statements.back().sanity_id;
node_offset_t cursor = child_count;
while (cursor--)
{
exec_nodes.push_back(child_node_idx + cursor);
exec_nodes.back().command_sanity_id = command_sanity_id;
}
}
} }
void pop_push(uint32_t child_idx) void pop()
{
PARSE_ASSERT(! exec_nodes.empty());
exec_nodes.pop_back();
}
void pop_push_all()
{ {
exec_node_t &top = exec_nodes.back(); exec_node_t &top = exec_nodes.back();
const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx);
PARSE_ASSERT(child_idx < parse_node.child_count); pop_push(0, parse_node.child_count);
node_offset_t child_node_idx = parse_node.child_start + child_idx; }
exec_nodes.pop_back();
exec_nodes.push_back(child_node_idx); void assemble_command(node_offset_t idx)
{
// Set the command for our top basic statement
PARSE_ASSERT(! assembling_statements.empty());
assembling_statements.back().command_idx = idx;
}
void assemble_argument_or_redirection(node_offset_t idx)
{
const parse_node_t &node = parse_tree.at(idx);
PARSE_ASSERT(! assembling_statements.empty());
exec_basic_statement_t &statement = assembling_statements.back();
switch (node.type)
{
case parse_token_type_string:
// Argument
{
exec_argument_t arg = exec_argument_t();
arg.parse_node_idx = idx;
arg.command_sanity_id = statement.sanity_id;
statement.arguments.push_back(arg);
}
break;
case parse_token_type_redirection:
// Redirection
break;
default:
PARSER_DIE();
break;
}
} }
void simulate(void); void assembly_complete()
{
// Finished building a command
PARSE_ASSERT(! assembling_statements.empty());
const exec_basic_statement_t &statement = assembling_statements.back();
if (simulating)
{
simulate_statement(statement);
}
assembling_statements.pop_back();
}
void simulate_statement(const exec_basic_statement_t &statement)
{
PARSE_ASSERT(simulating);
wcstring line;
switch (statement.decoration)
{
case exec_basic_statement_t::decoration_builtin:
line.append(L"<builtin> ");
break;
case exec_basic_statement_t::decoration_command:
line.append(L"<command> ");
break;
default:
break;
}
wcstring tmp;
get_node_string(statement.command_idx, &tmp);
line.append(L"cmd:");
line.append(tmp);
for (size_t i=0; i < statement.arguments.size(); i++)
{
const exec_argument_t &arg = statement.arguments.at(i);
get_node_string(arg.parse_node_idx, &tmp);
line.append(L" ");
line.append(L"arg:");
line.append(tmp);
}
simulation_result.push_back(line);
}
void enter_parse_node(size_t idx); void enter_parse_node(size_t idx);
void run_top_node(void); void run_top_node(void);
exec_basic_statement_t *create_basic_statement(void);
public:
parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), simulating(false)
{
}
wcstring simulate(void);
}; };
exec_basic_statement_t *parse_exec_t::create_basic_statement()
{
assembling_statements.push_back(exec_basic_statement_t());
return &assembling_statements.back();
}
void parse_exec_t::run_top_node() void parse_exec_t::run_top_node()
{ {
PARSE_ASSERT(! exec_nodes.empty()); PARSE_ASSERT(! exec_nodes.empty());
exec_node_t &top = exec_nodes.back(); exec_node_t &top = exec_nodes.back();
const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx);
bool log = false;
if (log)
{
wcstring tmp;
tmp.append(exec_nodes.size(), L' ');
tmp.append(parse_node.describe());
printf("%ls\n", tmp.c_str());
}
switch (parse_node.type) switch (parse_node.type)
{ {
case symbol_statement_list: case symbol_statement_list:
PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2);
if (parse_node.child_count == 0) pop_push_all();
{
// Statement list done
exec_nodes.pop_back();
}
else
{
// First child is a statement, next is the rest of the list
node_offset_t head = parse_node.child_start;
node_offset_t tail = parse_node.child_start + 1;
exec_nodes.pop_back();
exec_nodes.push_back(tail);
exec_nodes.push_back(head);
}
break; break;
case symbol_statement: case symbol_statement:
PARSE_ASSERT(parse_node.child_count == 1); PARSE_ASSERT(parse_node.child_count == 1);
pop_push(0); pop_push_all();
break; break;
case decorated_statement: case symbol_decorated_statement:
{
PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2 ); PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2 );
pop_push(0); exec_basic_statement_t *cmd = create_basic_statement();
cmd->set_decoration(parse_node.tag);
// Push the last node (skip any decoration)
pop_push(parse_node.child_count - 1, 1);
break;
}
case symbol_plain_statement:
PARSE_ASSERT(parse_node.child_count == 3);
// Extract the command
PARSE_ASSERT(! assembling_statements.empty());
assemble_command(parse_node.child_start + 0);
// Jump to statement list, then terminator
pop_push(1, 2);
break;
case symbol_arguments_or_redirections_list:
PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2);
pop_push_all();
break;
case symbol_argument_or_redirection:
PARSE_ASSERT(parse_node.child_count == 1);
assemble_argument_or_redirection(parse_node.child_start + 0);
pop();
break;
case parse_token_type_end:
PARSE_ASSERT(parse_node.child_count == 0);
assembly_complete();
pop();
break; break;
default: default:
@ -118,23 +317,27 @@ void parse_exec_t::enter_parse_node(size_t idx)
} }
wcstring parse_exec_t::simulate(void) wcstring parse_exec_t::simulate(void)
{
PARSE_ASSERT(exec_nodes.empty());
assemble_statement_list(0);
enter_parse_node(0);
run_node();
}
wcstring parse_execution_context_t::simulate()
{ {
if (parse_tree.empty()) if (parse_tree.empty())
return L"(empty!)"; return L"(empty!)";
PARSE_ASSERT(node_idx < nodes.size()); PARSE_ASSERT(exec_nodes.empty());
PARSE_ASSERT(nodes.at(node_idx).type == symbol_statement_list); simulating = true;
enter_parse_node(0);
while (! exec_nodes.empty())
{
run_top_node();
}
wcstring result; wcstring result;
for (size_t i=0; i < simulation_result.size(); i++)
{
result.append(simulation_result.at(i));
result.append(L"\n");
}
return result;
} }
parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s)

View file

@ -634,7 +634,7 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt)
return result; return result;
} }
void parse_t::parse(const wcstring &str) void parse_t::parse(const wcstring &str, parse_node_tree_t *output)
{ {
tokenizer_t tok = tokenizer_t(str.c_str(), 0); tokenizer_t tok = tokenizer_t(str.c_str(), 0);
for (; tok_has_next(&tok); tok_next(&tok)) for (; tok_has_next(&tok); tok_next(&tok))
@ -658,5 +658,11 @@ void parse_t::parse(const wcstring &str)
} }
wcstring result = dump_tree(this->parser->nodes, str); wcstring result = dump_tree(this->parser->nodes, str);
fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str());
fprintf(stderr, "node size %ld\n", sizeof(parse_node_t)); fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t));
if (output != NULL)
{
output->swap(this->parser->nodes);
this->parser->nodes.clear();
}
} }

View file

@ -17,22 +17,21 @@
#define PARSE_ASSERT(a) assert(a) #define PARSE_ASSERT(a) assert(a)
#define PARSER_DIE() assert(0) #define PARSER_DIE() assert(0)
class parse_node_t;
typedef std::vector<parse_node_t> parse_node_tree_t;
typedef size_t node_offset_t;
class parse_ll_t; class parse_ll_t;
class parse_sr_t;
class parse_t class parse_t
{ {
parse_ll_t * const parser; parse_ll_t * const parser;
public: public:
parse_t(); parse_t();
void parse(const wcstring &str); void parse(const wcstring &str, parse_node_tree_t *output);
}; };
class parse_node_t;
typedef std::vector<parse_node_t> parse_node_tree_t;
typedef size_t node_offset_t;
enum parse_token_type_t enum parse_token_type_t
{ {