More work on new parser

This commit is contained in:
ridiculousfish 2013-08-11 00:35:00 -07:00
parent 8e07e55c1f
commit e58b73179f
6 changed files with 532 additions and 215 deletions

View file

@ -4075,7 +4075,10 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
} }
else else
{ {
if (0) { const wcstring dump = parse_dump_tree(parse_tree, src);
fprintf(stderr, "%ls", dump.c_str());
if (0)
{
parse_execution_context_t ctx(parse_tree, src); parse_execution_context_t ctx(parse_tree, src);
parse_execution_simulator_t sim; parse_execution_simulator_t sim;
sim.context = &ctx; sim.context = &ctx;

View file

@ -1816,7 +1816,10 @@ static void test_new_parser_correctness(void)
{L"if true ; end", true}, {L"if true ; end", true},
{L"if true; end ; end", false}, {L"if true; end ; end", false},
{L"if end; end ; end", false}, {L"if end; end ; end", false},
{L"end", false} {L"if end", false},
{L"end", false},
{L"for i i", false},
{L"for i in a b c ; end", true}
}; };
for (size_t i=0; i < sizeof parser_tests / sizeof *parser_tests; i++) for (size_t i=0; i < sizeof parser_tests / sizeof *parser_tests; i++)
@ -1837,7 +1840,87 @@ static void test_new_parser_correctness(void)
} }
} }
say(L"Parse tests complete"); say(L"Parse tests complete");
}
struct parser_fuzz_token_t
{
parse_token_type_t token_type;
parse_keyword_t keyword;
parser_fuzz_token_t() : token_type(FIRST_TERMINAL_TYPE), keyword(parse_keyword_none)
{
}
};
static bool increment(std::vector<parser_fuzz_token_t> &tokens)
{
size_t i, end = tokens.size();
for (i=0; i < end; i++)
{
bool wrapped = false;
struct parser_fuzz_token_t &token = tokens[i];
bool incremented_in_keyword = false;
if (token.token_type == parse_token_type_string)
{
// try incrementing the keyword
token.keyword++;
if (token.keyword <= LAST_KEYWORD)
{
incremented_in_keyword = true;
}
else
{
token.keyword = parse_keyword_none;
incremented_in_keyword = false;
}
}
if (! incremented_in_keyword)
{
token.token_type++;
if (token.token_type > LAST_TERMINAL_TYPE)
{
token.token_type = FIRST_TERMINAL_TYPE;
wrapped = true;
}
}
if (! wrapped)
{
break;
}
}
return i == end;
}
static void test_new_parser_fuzzing(void)
{
say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t));
double start = timef();
// ensure nothing crashes
size_t max = 5;
for (size_t len=1; len <= max; len++)
{
fprintf(stderr, "%lu / %lu\n", len, max);
std::vector<parser_fuzz_token_t> tokens(len);
do
{
parse_t parser;
parse_node_tree_t parse_tree;
parse_error_list_t errors;
for (size_t i=0; i < len; i++)
{
const parser_fuzz_token_t &token = tokens[i];
parser.parse_1_token(token.token_type, token.keyword, &parse_tree, &errors);
}
// keep going until we wrap
}
while (! increment(tokens));
}
double end = timef();
say(L"All fuzzed in %f seconds!", end - start);
} }
__attribute__((unused)) __attribute__((unused))
@ -1863,6 +1946,104 @@ static void test_new_parser(void)
} }
} }
static void test_highlighting(void)
{
say(L"Testing syntax highlighting");
if (system("mkdir -p /tmp/fish_highlight_test/")) err(L"mkdir failed");
if (system("touch /tmp/fish_highlight_test/foo")) err(L"touch failed");
if (system("touch /tmp/fish_highlight_test/bar")) err(L"touch failed");
// Here are the components of our source and the colors we expect those to be
struct highlight_component_t {
const wchar_t *txt;
int color;
};
const highlight_component_t components1[] =
{
{L"echo", HIGHLIGHT_COMMAND},
{L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH},
{L"&", HIGHLIGHT_END},
{NULL, -1}
};
const highlight_component_t components2[] =
{
{L"command", HIGHLIGHT_COMMAND},
{L"echo", HIGHLIGHT_COMMAND},
{L"abc", HIGHLIGHT_PARAM},
{L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH},
{L"&", HIGHLIGHT_END},
{NULL, -1}
};
const highlight_component_t components3[] =
{
{L"if command ls", HIGHLIGHT_COMMAND},
{L"; ", HIGHLIGHT_END},
{L"echo", HIGHLIGHT_COMMAND},
{L"abc", HIGHLIGHT_PARAM},
{L"; ", HIGHLIGHT_END},
{L"/bin/definitely_not_a_command", HIGHLIGHT_ERROR},
{L"; ", HIGHLIGHT_END},
{L"end", HIGHLIGHT_COMMAND},
{NULL, -1}
};
const highlight_component_t *tests[] = {components1, components2, components3};
for (size_t which = 0; which < sizeof tests / sizeof *tests; which++)
{
const highlight_component_t *components = tests[which];
// Count how many we have
size_t component_count = 0;
while (components[component_count].txt != NULL)
{
component_count++;
}
// Generate the text
wcstring text;
std::vector<int> expected_colors;
for (size_t i=0; i < component_count; i++)
{
if (i > 0)
{
text.push_back(L' ');
expected_colors.push_back(0);
}
text.append(components[i].txt);
// hackish space handling
const size_t text_len = wcslen(components[i].txt);
for (size_t j=0; j < text_len; j++)
{
bool is_space = (components[i].txt[j] == L' ');
expected_colors.push_back(is_space ? 0 : components[i].color);
}
}
assert(expected_colors.size() == text.size());
std::vector<int> colors(text.size());
highlight_shell(text, colors, 20, NULL, env_vars_snapshot_t());
if (expected_colors.size() != colors.size())
{
err(L"Color vector has wrong size! Expected %lu, actual %lu", expected_colors.size(), colors.size());
}
assert(expected_colors.size() == colors.size());
for (size_t i=0; i < text.size(); i++)
{
if (expected_colors.at(i) != colors.at(i))
{
const wcstring spaces(i, L' ');
err(L"Wrong color at index %lu in text (expected %d, actual %d):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str());
}
}
}
system("rm -Rf /tmp/fish_highlight_test");
}
/** /**
Main test Main test
*/ */
@ -1884,9 +2065,10 @@ int main(int argc, char **argv)
reader_init(); reader_init();
env_init(); env_init();
test_new_parser_correctness(); //test_new_parser_fuzzing();
//test_new_parser_correctness();
//test_highlighting();
//test_new_parser(); //test_new_parser();
return 0;
test_format(); test_format();
test_escape(); test_escape();

View file

@ -1314,7 +1314,8 @@ void highlight_shell_magic(const wcstring &buff, std::vector<int> &color, size_t
void highlight_shell(const wcstring &buff, std::vector<int> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars) void highlight_shell(const wcstring &buff, std::vector<int> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
{ {
ASSERT_IS_BACKGROUND_THREAD(); ASSERT_IS_BACKGROUND_THREAD();
if (1) { if (0)
{
highlight_shell_magic(buff, color, pos, error, vars); highlight_shell_magic(buff, color, pos, error, vars);
return; return;
} }
@ -1810,6 +1811,20 @@ void highlight_shell_magic(const wcstring &buff, std::vector<int> &color, size_t
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color); color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color);
break; break;
case symbol_if_statement:
{
// Color the 'end'
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color);
// Color arguments and redirections
const parse_node_t *arguments = parse_tree.get_child(node, 3, symbol_arguments_or_redirections_list);
if (arguments != NULL)
{
color_arguments(buff, parse_tree, *arguments, color);
}
}
break;
case symbol_redirection: case symbol_redirection:
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color); color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color);
break; break;

View file

@ -249,8 +249,10 @@ RESOLVE(argument_list)
{ {
switch (token_type) switch (token_type)
{ {
case parse_token_type_string: return 1; case parse_token_type_string:
default: return 0; return 1;
default:
return 0;
} }
} }

View file

@ -4,6 +4,7 @@
using namespace parse_productions; using namespace parse_productions;
/** Returns a string description of this parse error */
wcstring parse_error_t::describe(const wcstring &src) const wcstring parse_error_t::describe(const wcstring &src) const
{ {
wcstring result = text; wcstring result = text;
@ -41,6 +42,7 @@ wcstring parse_error_t::describe(const wcstring &src) const
return result; return result;
} }
/** Returns a string description of the given token type */
wcstring token_type_description(parse_token_type_t type) wcstring token_type_description(parse_token_type_t type)
{ {
switch (type) switch (type)
@ -172,54 +174,52 @@ wcstring keyword_description(parse_keyword_t k)
} }
} }
/** Returns a string description of the given parse node */
wcstring parse_node_t::describe(void) const wcstring parse_node_t::describe(void) const
{ {
wcstring result = token_type_description(type); wcstring result = token_type_description(type);
return result; return result;
} }
/** A struct representing the token type passed to */
struct parse_token_t struct parse_token_t
{ {
enum parse_token_type_t type; // The type of the token as represented by the parser enum parse_token_type_t type; // The type of the token as represented by the parser
enum token_type tokenizer_type; // The type of the token as represented by the tokenizer
enum parse_keyword_t keyword; // Any keyword represented by this parser enum parse_keyword_t keyword; // Any keyword represented by this parser
size_t source_start; size_t source_start;
size_t source_length; size_t source_length;
wcstring describe() const; wcstring describe() const
}; {
wcstring parse_token_t::describe(void) const
{
wcstring result = token_type_description(type); wcstring result = token_type_description(type);
if (keyword != parse_keyword_none) if (keyword != parse_keyword_none)
{ {
append_format(result, L" <%ls>", keyword_description(keyword).c_str()); append_format(result, L" <%ls>", keyword_description(keyword).c_str());
} }
return result; return result;
} }
};
// Convert from tokenizer_t's token type to our token /* Convert from tokenizer_t's token type to a parse_token_t type */
static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type) static parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type)
{ {
parse_token_t result = {}; parse_token_type_t result = token_type_invalid;
result.tokenizer_type = tokenizer_token_type;
switch (tokenizer_token_type) switch (tokenizer_token_type)
{ {
case TOK_STRING: case TOK_STRING:
result.type = parse_token_type_string; result = parse_token_type_string;
break; break;
case TOK_PIPE: case TOK_PIPE:
result.type = parse_token_type_pipe; result = parse_token_type_pipe;
break; break;
case TOK_END: case TOK_END:
result.type = parse_token_type_end; result = parse_token_type_end;
break; break;
case TOK_BACKGROUND: case TOK_BACKGROUND:
result.type = parse_token_type_background; result = parse_token_type_background;
break; break;
case TOK_REDIRECT_OUT: case TOK_REDIRECT_OUT:
@ -227,15 +227,15 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_
case TOK_REDIRECT_IN: case TOK_REDIRECT_IN:
case TOK_REDIRECT_FD: case TOK_REDIRECT_FD:
case TOK_REDIRECT_NOCLOB: case TOK_REDIRECT_NOCLOB:
result.type = parse_token_type_redirection; result = parse_token_type_redirection;
break; break;
case TOK_ERROR: case TOK_ERROR:
result.type = parse_special_type_tokenizer_error; result = parse_special_type_tokenizer_error;
break; break;
case TOK_COMMENT: case TOK_COMMENT:
result.type = parse_special_type_comment; result = parse_special_type_comment;
break; break;
@ -247,6 +247,7 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_
return result; return result;
} }
/* Helper function for dump_tree */
static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line)
{ {
assert(start < nodes.size()); assert(start < nodes.size());
@ -288,8 +289,8 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &
} }
} }
__attribute__((unused)) /* Gives a debugging textual description of a parse tree */
static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src) wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
{ {
if (nodes.empty()) if (nodes.empty())
return L"(empty!)"; return L"(empty!)";
@ -300,6 +301,7 @@ static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
return result; return result;
} }
/* Struct representing elements of the symbol stack, used in the internal state of the LL parser */
struct parse_stack_element_t struct parse_stack_element_t
{ {
enum parse_token_type_t type; enum parse_token_type_t type;
@ -323,31 +325,25 @@ struct parse_stack_element_t
} }
return result; return result;
} }
}; };
/* The parser itself, private implementation of class parse_t. This is a hand-coded table-driven LL parser. Most hand-coded LL parsers are recursive descent, but recursive descent parsers are difficult to "pause", unlike table-driven parsers. */
class parse_ll_t class parse_ll_t
{ {
friend class parse_t; /* Traditional symbol stack of the LL parser */
std::vector<parse_stack_element_t> symbol_stack;
std::vector<parse_stack_element_t> symbol_stack; // LL parser stack /* Parser output. This is a parse tree, but stored in an array. */
parse_node_tree_t nodes; parse_node_tree_t nodes;
/* Whether we ran into a fatal error, including parse errors or tokenizer errors */
bool fatal_errored; bool fatal_errored;
/* List of errors we have encountered */
parse_error_list_t errors; parse_error_list_t errors;
// Constructor /* The symbol stack can contain terminal types or symbols. Symbols go on to do productions, but terminal types are just matched against input tokens. */
parse_ll_t() : fatal_errored(false) bool top_node_handle_terminal_types(parse_token_t token);
{
this->reset();
}
bool top_node_match_token(parse_token_t token);
void accept_token(parse_token_t token, const wcstring &src);
// Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node.
void reset(void);
void parse_error(const wchar_t *expected, parse_token_t token); void parse_error(const wchar_t *expected, parse_token_t token);
void parse_error(parse_token_t token, const wchar_t *format, ...); void parse_error(parse_token_t token, const wchar_t *format, ...);
@ -355,9 +351,6 @@ class parse_ll_t
void dump_stack(void) const; void dump_stack(void) const;
// Figure out the ranges of intermediate nodes
void determine_node_ranges();
// Get the node corresponding to the top element of the stack // Get the node corresponding to the top element of the stack
parse_node_t &node_for_top_symbol() parse_node_t &node_for_top_symbol()
{ {
@ -420,7 +413,12 @@ class parse_ll_t
for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++)
{ {
production_element_t elem = (*production)[i]; production_element_t elem = (*production)[i];
if (production_element_is_valid(elem)) if (!production_element_is_valid(elem))
{
// All done, bail out
break;
}
else
{ {
// Generate the parse node. Note that this push_back may invalidate node. // Generate the parse node. Note that this push_back may invalidate node.
parse_token_type_t child_type = production_element_type(elem); parse_token_type_t child_type = production_element_type(elem);
@ -452,6 +450,36 @@ class parse_ll_t
} }
} }
public:
/* Constructor */
parse_ll_t() : fatal_errored(false)
{
this->symbol_stack.reserve(16);
this->nodes.reserve(64);
this->reset_symbols_and_nodes();
}
/* Input */
void accept_token(parse_token_t token);
/* Indicate if we hit a fatal error */
bool has_fatal_error(void) const
{
return this->fatal_errored;
}
/* Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. This is called from the constructor */
void reset_symbols(void);
/* Clear the parse symbol stack and the node tree. Add a new job_list_t goal node. This is called from the constructor. */
void reset_symbols_and_nodes(void);
/* Once parsing is complete, determine the ranges of intermediate nodes */
void determine_node_ranges();
/* Acquire output after parsing. This transfers directly from within self */
void acquire_output(parse_node_tree_t *output, parse_error_list_t *errors);
}; };
void parse_ll_t::dump_stack(void) const void parse_ll_t::dump_stack(void) const
@ -509,7 +537,8 @@ void parse_ll_t::determine_node_ranges(void)
max_end = std::max(max_end, child.source_start + child.source_length); max_end = std::max(max_end, child.source_start + child.source_length);
} }
if (min_start != source_start_invalid) { if (min_start != source_start_invalid)
{
assert(max_end >= min_start); assert(max_end >= min_start);
parent->source_start = min_start; parent->source_start = min_start;
parent->source_length = max_end - min_start; parent->source_length = max_end - min_start;
@ -517,6 +546,22 @@ void parse_ll_t::determine_node_ranges(void)
} }
} }
void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *errors)
{
if (output != NULL)
{
std::swap(*output, this->nodes);
}
this->nodes.clear();
if (errors != NULL)
{
std::swap(*errors, this->errors);
}
this->errors.clear();
this->symbol_stack.clear();
}
void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...) void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...)
{ {
//this->dump_stack(); //this->dump_stack();
@ -545,9 +590,9 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token)
fatal_errored = true; fatal_errored = true;
} }
void parse_ll_t::reset(void) void parse_ll_t::reset_symbols(void)
{ {
// add a new job_list node and then reset our symbol list to point at it /* Add a new job_list node, and then reset our symbol list to point at it */
node_offset_t where = nodes.size(); node_offset_t where = nodes.size();
nodes.push_back(parse_node_t(symbol_job_list)); nodes.push_back(parse_node_t(symbol_job_list));
@ -556,8 +601,31 @@ void parse_ll_t::reset(void)
this->fatal_errored = false; this->fatal_errored = false;
} }
/* Reset both symbols and nodes */
void parse_ll_t::reset_symbols_and_nodes(void)
{
nodes.clear();
this->reset_symbols();
}
bool parse_ll_t::top_node_match_token(parse_token_t token) static bool type_is_terminal_type(parse_token_type_t type)
{
switch (type)
{
case parse_token_type_string:
case parse_token_type_pipe:
case parse_token_type_redirection:
case parse_token_type_background:
case parse_token_type_end:
case parse_token_type_terminate:
return true;
default:
return false;
}
}
bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token)
{ {
if (symbol_stack.empty()) if (symbol_stack.empty())
{ {
@ -568,38 +636,55 @@ bool parse_ll_t::top_node_match_token(parse_token_t token)
PARSE_ASSERT(! symbol_stack.empty()); PARSE_ASSERT(! symbol_stack.empty());
PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
bool result = false; bool handled = false;
parse_stack_element_t &stack_top = symbol_stack.back(); parse_stack_element_t &stack_top = symbol_stack.back();
if (type_is_terminal_type(stack_top.type))
{
// The top of the stack is terminal. We are going to handle this (because we can't produce from a terminal type)
handled = true;
// Now see if we actually matched
bool matched = false;
if (stack_top.type == token.type) if (stack_top.type == token.type)
{ {
// So far so good. See if we need a particular keyword. switch (stack_top.type)
if (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword) {
case parse_token_type_string:
// We matched if the keywords match, or no keyword was required
matched = (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword);
break;
default:
// For other types, we only require that the types match
matched = true;
break;
}
}
if (matched)
{ {
// Success. Tell the node that it matched this token // Success. Tell the node that it matched this token
parse_node_t &node = node_for_top_symbol(); parse_node_t &node = node_for_top_symbol();
node.source_start = token.source_start; node.source_start = token.source_start;
node.source_length = token.source_length; node.source_length = token.source_length;
// We consumed this symbol
symbol_stack.pop_back();
result = true;
} }
else if (token.type == parse_token_type_pipe) else
{ {
// Pipes are primitive // Failure
this->fatal_errored = true;
}
// We handled the token, so pop the symbol stack
symbol_stack.pop_back(); symbol_stack.pop_back();
result = true;
} }
} return handled;
return result;
} }
void parse_ll_t::accept_token(parse_token_t token, const wcstring &src) void parse_ll_t::accept_token(parse_token_t token)
{ {
bool logit = false; bool logit = false;
if (logit) if (logit)
{ {
const wcstring txt = wcstring(src, token.source_start, token.source_length);
fprintf(stderr, "Accept token %ls\n", token.describe().c_str()); fprintf(stderr, "Accept token %ls\n", token.describe().c_str());
} }
PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE); PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
@ -620,7 +705,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
{ {
PARSE_ASSERT(! symbol_stack.empty()); PARSE_ASSERT(! symbol_stack.empty());
if (top_node_match_token(token)) if (top_node_handle_terminal_types(token))
{ {
if (logit) if (logit)
{ {
@ -715,7 +800,7 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
tok_options |= TOK_SHOW_COMMENTS; tok_options |= TOK_SHOW_COMMENTS;
tokenizer_t tok = tokenizer_t(str.c_str(), tok_options); tokenizer_t tok = tokenizer_t(str.c_str(), tok_options);
for (; tok_has_next(&tok) && ! this->parser->fatal_errored; tok_next(&tok)) for (; tok_has_next(&tok) && ! this->parser->has_fatal_error(); tok_next(&tok))
{ {
token_type tok_type = static_cast<token_type>(tok_last_type(&tok)); token_type tok_type = static_cast<token_type>(tok_last_type(&tok));
const wchar_t *tok_txt = tok_last(&tok); const wchar_t *tok_txt = tok_last(&tok);
@ -723,22 +808,22 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
size_t tok_extent = tok_get_extent(&tok); size_t tok_extent = tok_get_extent(&tok);
assert(tok_extent < 10000000); //paranoia assert(tok_extent < 10000000); //paranoia
parse_token_t token = parse_token_from_tokenizer_token(tok_type); parse_token_t token;
token.tokenizer_type = tok_type; token.type = parse_token_type_from_tokenizer_token(tok_type);
token.source_start = (size_t)tok_start; token.source_start = (size_t)tok_start;
token.source_length = tok_extent; token.source_length = tok_extent;
token.keyword = keyword_for_token(tok_type, tok_txt); token.keyword = keyword_for_token(tok_type, tok_txt);
this->parser->accept_token(token, str); this->parser->accept_token(token);
if (this->parser->fatal_errored) if (this->parser->has_fatal_error())
{ {
if (parse_flags & parse_flag_continue_after_error) if (parse_flags & parse_flag_continue_after_error)
{ {
/* Mark an error and then keep going */ /* Mark an error and then keep going */
token.type = parse_special_type_parse_error; token.type = parse_special_type_parse_error;
token.keyword = parse_keyword_none; token.keyword = parse_keyword_none;
this->parser->accept_token(token, str); this->parser->accept_token(token);
this->parser->reset(); this->parser->reset_symbols();
} }
else else
{ {
@ -757,19 +842,32 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t)); fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t));
#endif #endif
if (output != NULL) // Acquire the output from the parser
{ this->parser->acquire_output(output, errors);
output->swap(this->parser->nodes);
this->parser->nodes.clear();
}
if (errors != NULL) // Indicate if we had a fatal error
{ return ! this->parser->has_fatal_error();
errors->swap(this->parser->errors); }
this->parser->errors.clear();
}
return ! this->parser->fatal_errored; bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors)
{
// Only strings can have keywords. So if we have a keyword, the type must be a string
assert(keyword == parse_keyword_none || token_type == parse_token_type_string);
parse_token_t token;
token.type = token_type;
token.keyword = keyword;
token.source_start = -1;
token.source_length = 0;
this->parser->accept_token(token);
return ! this->parser->has_fatal_error();
}
void parse_t::clear()
{
this->parser->reset_symbols_and_nodes();
} }
const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const

View file

@ -36,29 +36,6 @@ struct parse_error_t
}; };
typedef std::vector<parse_error_t> parse_error_list_t; typedef std::vector<parse_error_t> parse_error_list_t;
enum
{
parse_flag_none = 0,
/* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
parse_flag_continue_after_error = 1 << 0,
/* Include comment tokens */
parse_flag_include_comments = 1 << 1
};
typedef unsigned int parse_tree_flags_t;
class parse_ll_t;
class parse_t
{
parse_ll_t * const parser;
public:
parse_t();
~parse_t();
bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
};
enum parse_token_type_t enum parse_token_type_t
{ {
token_type_invalid, token_type_invalid,
@ -111,6 +88,9 @@ enum parse_token_type_t
parse_special_type_tokenizer_error, parse_special_type_tokenizer_error,
parse_special_type_comment, parse_special_type_comment,
FIRST_TERMINAL_TYPE = parse_token_type_string,
LAST_TERMINAL_TYPE = parse_token_type_terminate,
LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate, LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
}; };
@ -132,9 +112,46 @@ enum parse_keyword_t
parse_keyword_or, parse_keyword_or,
parse_keyword_not, parse_keyword_not,
parse_keyword_command, parse_keyword_command,
parse_keyword_builtin parse_keyword_builtin,
LAST_KEYWORD = parse_keyword_builtin
}; };
enum
{
parse_flag_none = 0,
/* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
parse_flag_continue_after_error = 1 << 0,
/* Include comment tokens */
parse_flag_include_comments = 1 << 1
};
typedef unsigned int parse_tree_flags_t;
class parse_ll_t;
class parse_t
{
parse_ll_t * const parser;
public:
parse_t();
~parse_t();
/* Parse a string */
bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
/* Parse a single token */
bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors);
/* Reset, ready to parse something else */
void clear();
};
wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);
wcstring token_type_description(parse_token_type_t type); wcstring token_type_description(parse_token_type_t type);
wcstring keyword_description(parse_keyword_t type); wcstring keyword_description(parse_keyword_t type);
@ -184,7 +201,7 @@ public:
class parse_node_tree_t : public std::vector<parse_node_t> class parse_node_tree_t : public std::vector<parse_node_t>
{ {
public: public:
/* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */ /* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const; const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
@ -200,8 +217,8 @@ class parse_node_tree_t : public std::vector<parse_node_t>
# A job_list is a list of jobs, separated by semicolons or newlines # A job_list is a list of jobs, separated by semicolons or newlines
job_list = <empty> | job_list = <empty> |
<TOK_END> job_list |
job job_list job job_list
<TOK_END> job_list
# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation # A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation