diff --git a/builtin.cpp b/builtin.cpp index 9796d356f..12b331b82 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -64,6 +64,8 @@ #include "expand.h" #include "path.h" #include "history.h" +#include "parse_tree.h" +#include "parse_exec.h" /** The default prompt for the read command @@ -3938,6 +3940,30 @@ static int builtin_history(parser_t &parser, wchar_t **argv) return STATUS_BUILTIN_ERROR; } +int builtin_parse(parser_t &parser, wchar_t **argv) +{ + std::vector txt; + for (;;) + { + char buff[256]; + ssize_t amt = read_loop(builtin_stdin, buff, sizeof buff); + if (amt <= 0) break; + txt.insert(txt.end(), buff, buff + amt); + } + if (! txt.empty()) + { + const wcstring src = str2wcstring(&txt.at(0), txt.size()); + parse_node_tree_t parse_tree; + parse_t parser; + parser.parse(src, &parse_tree); + parse_execution_context_t ctx(parse_tree, src); + stdout_buffer.append(L"Simulating execution:"); + wcstring simulation = ctx.simulate(); + stdout_buffer.append(simulation); + stdout_buffer.push_back(L'\n'); + } + return STATUS_BUILTIN_OK; +} /* END OF BUILTIN COMMANDS @@ -3985,6 +4011,7 @@ static const builtin_data_t builtin_datas[]= { L"jobs", &builtin_jobs, N_(L"Print currently running jobs") }, { L"not", &builtin_generic, N_(L"Negate exit status of job") }, { L"or", &builtin_generic, N_(L"Execute command if previous command failed") }, + { L"parse", &builtin_parse, N_(L"Try out the new parser") }, { L"printf", &builtin_printf, N_(L"Prints formatted text") }, { L"pwd", &builtin_pwd, N_(L"Print the working directory") }, { L"random", &builtin_random, N_(L"Generate random number") }, @@ -4144,4 +4171,3 @@ void builtin_pop_io(parser_t &parser) builtin_stdin = 0; } } - diff --git a/fish_tests.cpp b/fish_tests.cpp index 35c1e3277..20d79288b 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -1722,8 +1722,16 @@ void history_tests_t::test_history_speed(void) static void test_new_parser(void) { say(L"Testing new parser!"); + const wcstring src = L"echo hello world"; + parse_node_tree_t parse_tree; parse_t parser; - parser.parse(L"echo hello"); + parser.parse(src, &parse_tree); + parse_execution_context_t ctx(parse_tree, src); + say(L"Simulating execution:"); + wcstring simulation = ctx.simulate(); + printf("%ls\n", simulation.c_str()); + + } /** diff --git a/parse_exec.cpp b/parse_exec.cpp index 30503857e..04601e5ba 100644 --- a/parse_exec.cpp +++ b/parse_exec.cpp @@ -1,34 +1,41 @@ #include "parse_exec.h" #include +typedef uint16_t sanity_id_t; +static sanity_id_t next_sanity_id() +{ + static sanity_id_t last_sanity_id; + return ++last_sanity_id; +} struct exec_node_t { node_offset_t parse_node_idx; + sanity_id_t command_sanity_id; exec_node_t(size_t pni) : parse_node_idx(pni) { } - virtual ~exec_node_t(); }; -exec_node_t::~exec_node_t() +struct exec_argument_t { -} + node_offset_t parse_node_idx; + sanity_id_t command_sanity_id; +}; -struct exec_redirection_t : public exec_node_t +struct exec_redirection_t { }; -struct exec_argument_t : public exec_node_t +struct exec_basic_statement_t { + // Node containing the command + node_offset_t command_idx; -}; - -struct exec_statement_t -{ + // Decoration enum { decoration_plain, @@ -38,68 +45,260 @@ struct exec_statement_t std::vector arguments; std::vector redirections; + uint16_t sanity_id; + + exec_basic_statement_t() : command_idx(0), decoration(decoration_plain) + { + sanity_id = next_sanity_id(); + } + + void set_decoration(uint32_t k) + { + PARSE_ASSERT(k == parse_keyword_none || k == parse_keyword_command || k == parse_keyword_builtin); + switch (k) + { + case parse_keyword_none: + decoration = decoration_plain; + break; + case parse_keyword_command: + decoration = decoration_command; + break; + case parse_keyword_builtin: + decoration = decoration_builtin; + break; + default: + PARSER_DIE(); + break; + } + + } }; class parse_exec_t { parse_node_tree_t parse_tree; wcstring src; + + bool simulating; + wcstring_list_t simulation_result; + + /* The stack of nodes as we execute them */ std::vector exec_nodes; - parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s) + /* The stack of commands being built */ + std::vector assembling_statements; + + void get_node_string(node_offset_t idx, wcstring *output) const { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(node.source_start <= src.size()); + PARSE_ASSERT(node.source_start + node.source_length <= src.size()); + output->assign(src, node.source_start, node.source_length); + } + + void pop_push(node_offset_t child_idx, node_offset_t child_count = 1) + { + PARSE_ASSERT(! exec_nodes.empty()); + if (child_count == 0) + { + // No children, just remove the top node + exec_nodes.pop_back(); + } + else + { + // Figure out the offset of the children + exec_node_t &top = exec_nodes.back(); + const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + PARSE_ASSERT(child_idx < parse_node.child_count); + node_offset_t child_node_idx = parse_node.child_start + child_idx; + + // Remove the top node + exec_nodes.pop_back(); + + // Append the given children, backwards + sanity_id_t command_sanity_id = assembling_statements.empty() ? 0 : assembling_statements.back().sanity_id; + node_offset_t cursor = child_count; + while (cursor--) + { + exec_nodes.push_back(child_node_idx + cursor); + exec_nodes.back().command_sanity_id = command_sanity_id; + } + } } - void pop_push(uint32_t child_idx) + void pop() + { + PARSE_ASSERT(! exec_nodes.empty()); + exec_nodes.pop_back(); + } + + void pop_push_all() { exec_node_t &top = exec_nodes.back(); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); - PARSE_ASSERT(child_idx < parse_node.child_count); - node_offset_t child_node_idx = parse_node.child_start + child_idx; - exec_nodes.pop_back(); - exec_nodes.push_back(child_node_idx); + pop_push(0, parse_node.child_count); + } + + void assemble_command(node_offset_t idx) + { + // Set the command for our top basic statement + PARSE_ASSERT(! assembling_statements.empty()); + assembling_statements.back().command_idx = idx; + } + + void assemble_argument_or_redirection(node_offset_t idx) + { + const parse_node_t &node = parse_tree.at(idx); + PARSE_ASSERT(! assembling_statements.empty()); + exec_basic_statement_t &statement = assembling_statements.back(); + switch (node.type) + { + case parse_token_type_string: + // Argument + { + exec_argument_t arg = exec_argument_t(); + arg.parse_node_idx = idx; + arg.command_sanity_id = statement.sanity_id; + statement.arguments.push_back(arg); + } + break; + + case parse_token_type_redirection: + // Redirection + break; + + default: + PARSER_DIE(); + break; + } } - void simulate(void); + void assembly_complete() + { + // Finished building a command + PARSE_ASSERT(! assembling_statements.empty()); + const exec_basic_statement_t &statement = assembling_statements.back(); + + if (simulating) + { + simulate_statement(statement); + } + assembling_statements.pop_back(); + } + + void simulate_statement(const exec_basic_statement_t &statement) + { + PARSE_ASSERT(simulating); + wcstring line; + switch (statement.decoration) + { + case exec_basic_statement_t::decoration_builtin: + line.append(L" "); + break; + + case exec_basic_statement_t::decoration_command: + line.append(L" "); + break; + + default: + break; + } + + wcstring tmp; + get_node_string(statement.command_idx, &tmp); + line.append(L"cmd:"); + line.append(tmp); + for (size_t i=0; i < statement.arguments.size(); i++) + { + const exec_argument_t &arg = statement.arguments.at(i); + get_node_string(arg.parse_node_idx, &tmp); + line.append(L" "); + line.append(L"arg:"); + line.append(tmp); + } + simulation_result.push_back(line); + } + void enter_parse_node(size_t idx); void run_top_node(void); + exec_basic_statement_t *create_basic_statement(void); + + public: + parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s), simulating(false) + { + } + wcstring simulate(void); }; +exec_basic_statement_t *parse_exec_t::create_basic_statement() +{ + assembling_statements.push_back(exec_basic_statement_t()); + return &assembling_statements.back(); +} + void parse_exec_t::run_top_node() { PARSE_ASSERT(! exec_nodes.empty()); exec_node_t &top = exec_nodes.back(); const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx); + bool log = false; + + if (log) + { + wcstring tmp; + tmp.append(exec_nodes.size(), L' '); + tmp.append(parse_node.describe()); + printf("%ls\n", tmp.c_str()); + } switch (parse_node.type) { case symbol_statement_list: PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); - if (parse_node.child_count == 0) - { - // Statement list done - exec_nodes.pop_back(); - } - else - { - // First child is a statement, next is the rest of the list - node_offset_t head = parse_node.child_start; - node_offset_t tail = parse_node.child_start + 1; - exec_nodes.pop_back(); - exec_nodes.push_back(tail); - exec_nodes.push_back(head); - } + pop_push_all(); break; case symbol_statement: PARSE_ASSERT(parse_node.child_count == 1); - pop_push(0); + pop_push_all(); break; - case decorated_statement: + case symbol_decorated_statement: + { PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2 ); - pop_push(0); + exec_basic_statement_t *cmd = create_basic_statement(); + cmd->set_decoration(parse_node.tag); + + // Push the last node (skip any decoration) + pop_push(parse_node.child_count - 1, 1); + break; + } + + case symbol_plain_statement: + PARSE_ASSERT(parse_node.child_count == 3); + // Extract the command + PARSE_ASSERT(! assembling_statements.empty()); + assemble_command(parse_node.child_start + 0); + // Jump to statement list, then terminator + pop_push(1, 2); + break; + + case symbol_arguments_or_redirections_list: + PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2); + pop_push_all(); + break; + + case symbol_argument_or_redirection: + PARSE_ASSERT(parse_node.child_count == 1); + assemble_argument_or_redirection(parse_node.child_start + 0); + pop(); + break; + + case parse_token_type_end: + PARSE_ASSERT(parse_node.child_count == 0); + assembly_complete(); + pop(); break; default: @@ -118,23 +317,27 @@ void parse_exec_t::enter_parse_node(size_t idx) } wcstring parse_exec_t::simulate(void) -{ - PARSE_ASSERT(exec_nodes.empty()); - assemble_statement_list(0); - enter_parse_node(0); - run_node(); -} - -wcstring parse_execution_context_t::simulate() { if (parse_tree.empty()) return L"(empty!)"; - PARSE_ASSERT(node_idx < nodes.size()); - PARSE_ASSERT(nodes.at(node_idx).type == symbol_statement_list); + PARSE_ASSERT(exec_nodes.empty()); + simulating = true; + + enter_parse_node(0); + while (! exec_nodes.empty()) + { + run_top_node(); + } wcstring result; + for (size_t i=0; i < simulation_result.size(); i++) + { + result.append(simulation_result.at(i)); + result.append(L"\n"); + } + return result; } parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) diff --git a/parse_tree.cpp b/parse_tree.cpp index 8c38ff0cd..a58b0ff80 100644 --- a/parse_tree.cpp +++ b/parse_tree.cpp @@ -634,7 +634,7 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt) return result; } -void parse_t::parse(const wcstring &str) +void parse_t::parse(const wcstring &str, parse_node_tree_t *output) { tokenizer_t tok = tokenizer_t(str.c_str(), 0); for (; tok_has_next(&tok); tok_next(&tok)) @@ -658,5 +658,11 @@ void parse_t::parse(const wcstring &str) } wcstring result = dump_tree(this->parser->nodes, str); fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str()); - fprintf(stderr, "node size %ld\n", sizeof(parse_node_t)); + fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); + + if (output != NULL) + { + output->swap(this->parser->nodes); + this->parser->nodes.clear(); + } } diff --git a/parse_tree.h b/parse_tree.h index 892c36cdd..0b63efa82 100644 --- a/parse_tree.h +++ b/parse_tree.h @@ -17,22 +17,21 @@ #define PARSE_ASSERT(a) assert(a) #define PARSER_DIE() assert(0) +class parse_node_t; +typedef std::vector parse_node_tree_t; +typedef size_t node_offset_t; + class parse_ll_t; -class parse_sr_t; class parse_t { parse_ll_t * const parser; public: parse_t(); - void parse(const wcstring &str); + void parse(const wcstring &str, parse_node_tree_t *output); }; -class parse_node_t; -typedef std::vector parse_node_tree_t; -typedef size_t node_offset_t; - enum parse_token_type_t {