More work on new parser

This commit is contained in:
ridiculousfish 2013-06-11 09:37:51 -07:00
parent 048f08080d
commit e2a506e54a
5 changed files with 257 additions and 118 deletions

View file

@ -76,6 +76,7 @@
D0A564FE168D23D800AF6161 /* man in CopyFiles */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; };
D0A56501168D258300AF6161 /* man in Copy Files */ = {isa = PBXBuildFile; fileRef = D0A564F1168D0BAB00AF6161 /* man */; };
D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F351765284C00BFAB82 /* parse_tree.cpp */; };
D0C52F381765720600BFAB82 /* parse_exec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0C52F331765281F00BFAB82 /* parse_exec.cpp */; };
D0CBD587159EF0E10024809C /* launch_fish.scpt in Resources */ = {isa = PBXBuildFile; fileRef = D0CBD586159EF0E10024809C /* launch_fish.scpt */; };
D0D02A67159837AD008E62BD /* complete.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853713B3ACEE0099B651 /* complete.cpp */; };
D0D02A69159837B2008E62BD /* env.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D0A0853A13B3ACEE0099B651 /* env.cpp */; };
@ -1112,6 +1113,7 @@
D0D02A7B15983928008E62BD /* env_universal_common.cpp in Sources */,
D0D02A89159839DF008E62BD /* fish.cpp in Sources */,
D0C52F371765284C00BFAB82 /* parse_tree.cpp in Sources */,
D0C52F381765720600BFAB82 /* parse_exec.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};

View file

@ -1 +1,148 @@
#include "parse_exec.h"
#include <stack>
struct exec_node_t
{
node_offset_t parse_node_idx;
exec_node_t(size_t pni) : parse_node_idx(pni)
{
}
virtual ~exec_node_t();
};
exec_node_t::~exec_node_t()
{
}
struct exec_redirection_t : public exec_node_t
{
};
struct exec_argument_t : public exec_node_t
{
};
struct exec_statement_t
{
enum
{
decoration_plain,
decoration_command,
decoration_builtin
} decoration;
std::vector<exec_argument_t> arguments;
std::vector<exec_redirection_t> redirections;
};
class parse_exec_t
{
parse_node_tree_t parse_tree;
wcstring src;
std::vector<exec_node_t> exec_nodes;
parse_exec_t(const parse_node_tree_t &tree, const wcstring &s) : parse_tree(tree), src(s)
{
}
void pop_push(uint32_t child_idx)
{
exec_node_t &top = exec_nodes.back();
const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx);
PARSE_ASSERT(child_idx < parse_node.child_count);
node_offset_t child_node_idx = parse_node.child_start + child_idx;
exec_nodes.pop_back();
exec_nodes.push_back(child_node_idx);
}
void simulate(void);
void enter_parse_node(size_t idx);
void run_top_node(void);
};
void parse_exec_t::run_top_node()
{
PARSE_ASSERT(! exec_nodes.empty());
exec_node_t &top = exec_nodes.back();
const parse_node_t &parse_node = parse_tree.at(top.parse_node_idx);
switch (parse_node.type)
{
case symbol_statement_list:
PARSE_ASSERT(parse_node.child_count == 0 || parse_node.child_count == 2);
if (parse_node.child_count == 0)
{
// Statement list done
exec_nodes.pop_back();
}
else
{
// First child is a statement, next is the rest of the list
node_offset_t head = parse_node.child_start;
node_offset_t tail = parse_node.child_start + 1;
exec_nodes.pop_back();
exec_nodes.push_back(tail);
exec_nodes.push_back(head);
}
break;
case symbol_statement:
PARSE_ASSERT(parse_node.child_count == 1);
pop_push(0);
break;
case decorated_statement:
PARSE_ASSERT(parse_node.child_count == 1 || parse_node.child_count == 2 );
pop_push(0);
break;
default:
fprintf(stderr, "Unhandled token type %ld\n", (long)parse_node.type);
PARSER_DIE();
break;
}
}
void parse_exec_t::enter_parse_node(size_t idx)
{
PARSE_ASSERT(idx < parse_tree.size());
exec_node_t exec(idx);
exec_nodes.push_back(exec);
}
wcstring parse_exec_t::simulate(void)
{
PARSE_ASSERT(exec_nodes.empty());
assemble_statement_list(0);
enter_parse_node(0);
run_node();
}
wcstring parse_execution_context_t::simulate()
{
if (parse_tree.empty())
return L"(empty!)";
PARSE_ASSERT(node_idx < nodes.size());
PARSE_ASSERT(nodes.at(node_idx).type == symbol_statement_list);
wcstring result;
}
parse_execution_context_t::parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s)
{
ctx = new parse_exec_t(n, s);
}
wcstring parse_execution_context_t::simulate(void)
{
return ctx->simulate();
}

View file

@ -3,9 +3,20 @@
Programmatic execution of a parse tree
*/
#ifndef FISH_PARSE_TREE_H
#define FISH_PARSE_TREE_H
#ifndef FISH_PARSE_EXEC_H
#define FISH_PARSE_EXEC_H
#include "parse_tree.h"
class parse_exec_t;
class parse_execution_context_t
{
parse_exec_t *ctx;
public:
parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s);
wcstring simulate(void);
};
#endif

View file

@ -1,48 +1,10 @@
#include "expression.h"
#include "parse_tree.h"
#include "tokenizer.h"
#include <vector>
struct parse_node_t;
typedef size_t node_offset_t;
typedef std::vector<parse_node_t> parse_node_tree_t;
#define PARSE_ASSERT(a) assert(a)
#define PARSER_DIE() assert(0)
class parse_command_t;
enum parse_token_type_t
{
token_type_invalid,
// Non-terminal tokens
symbol_statement_list,
symbol_statement,
symbol_block_statement,
symbol_block_header,
symbol_if_header,
symbol_for_header,
symbol_while_header,
symbol_begin_header,
symbol_function_header,
symbol_boolean_statement,
symbol_decorated_statement,
symbol_plain_statement,
symbol_arguments_or_redirections_list,
symbol_argument_or_redirection,
// Terminal types
parse_token_type_string,
parse_token_type_pipe,
parse_token_type_redirection,
parse_token_background,
parse_token_type_end,
parse_token_type_terminate,
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
};
static wcstring token_type_description(parse_token_type_t type)
{
switch (type)
@ -75,25 +37,11 @@ static wcstring token_type_description(parse_token_type_t type)
}
}
enum parse_keyword_t
wcstring parse_node_t::describe(void) const
{
parse_keyword_none,
parse_keyword_if,
parse_keyword_else,
parse_keyword_for,
parse_keyword_in,
parse_keyword_while,
parse_keyword_begin,
parse_keyword_function,
parse_keyword_switch,
parse_keyword_end,
parse_keyword_and,
parse_keyword_or,
parse_keyword_not,
parse_keyword_command,
parse_keyword_builtin
};
wcstring result = token_type_description(type);
return result;
}
struct parse_token_t
{
@ -135,41 +83,6 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_
return result;
}
/** Base class for nodes of a parse tree */
class parse_node_t
{
public:
/* Start in the source code */
size_t source_start;
/* Length of our range in the source code */
size_t source_length;
/* Children */
node_offset_t child_start;
node_offset_t child_count;
/* Type-dependent data */
uint32_t tag;
/* Type of the node */
enum parse_token_type_t type;
/* Description */
wcstring describe(void) const
{
wcstring result = token_type_description(type);
return result;
}
/* Constructor */
explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0)
{
}
};
static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line)
{
assert(start < nodes.size());
@ -222,30 +135,6 @@ struct parse_stack_element_t
}
};
class parse_execution_context_t
{
wcstring src;
const parse_node_tree_t nodes;
size_t node_idx;
public:
parse_execution_context_t(const parse_node_tree_t &n, const wcstring &s) : src(s), nodes(n), node_idx(0)
{
}
wcstring simulate(void);
};
wcstring parse_execution_context_t::simulate()
{
if (nodes.empty())
return L"(empty!");
PARSE_ASSERT(node_idx < nodes.size());
PARSE_ASSERT(nodes.at(node_idx).type == symbol_statement_list);
}
class parse_ll_t
{
friend class parse_t;

View file

@ -12,6 +12,10 @@
#include "util.h"
#include "common.h"
#include "tokenizer.h"
#include <vector>
#define PARSE_ASSERT(a) assert(a)
#define PARSER_DIE() assert(0)
class parse_ll_t;
@ -25,6 +29,92 @@ class parse_t
void parse(const wcstring &str);
};
class parse_node_t;
typedef std::vector<parse_node_t> parse_node_tree_t;
typedef size_t node_offset_t;
enum parse_token_type_t
{
token_type_invalid,
// Non-terminal tokens
symbol_statement_list,
symbol_statement,
symbol_block_statement,
symbol_block_header,
symbol_if_header,
symbol_for_header,
symbol_while_header,
symbol_begin_header,
symbol_function_header,
symbol_boolean_statement,
symbol_decorated_statement,
symbol_plain_statement,
symbol_arguments_or_redirections_list,
symbol_argument_or_redirection,
// Terminal types
parse_token_type_string,
parse_token_type_pipe,
parse_token_type_redirection,
parse_token_background,
parse_token_type_end,
parse_token_type_terminate,
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
};
enum parse_keyword_t
{
parse_keyword_none,
parse_keyword_if,
parse_keyword_else,
parse_keyword_for,
parse_keyword_in,
parse_keyword_while,
parse_keyword_begin,
parse_keyword_function,
parse_keyword_switch,
parse_keyword_end,
parse_keyword_and,
parse_keyword_or,
parse_keyword_not,
parse_keyword_command,
parse_keyword_builtin
};
/** Base class for nodes of a parse tree */
class parse_node_t
{
public:
/* Type of the node */
enum parse_token_type_t type;
/* Start in the source code */
size_t source_start;
/* Length of our range in the source code */
size_t source_length;
/* Children */
node_offset_t child_start;
node_offset_t child_count;
/* Type-dependent data */
uint32_t tag;
/* Description */
wcstring describe(void) const;
/* Constructor */
explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0)
{
}
};
/* Fish grammar: