mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-27 05:13:10 +00:00
More work on new parser
This commit is contained in:
parent
8e07e55c1f
commit
e58b73179f
6 changed files with 532 additions and 215 deletions
|
@ -4075,7 +4075,10 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (0) {
|
const wcstring dump = parse_dump_tree(parse_tree, src);
|
||||||
|
fprintf(stderr, "%ls", dump.c_str());
|
||||||
|
if (0)
|
||||||
|
{
|
||||||
parse_execution_context_t ctx(parse_tree, src);
|
parse_execution_context_t ctx(parse_tree, src);
|
||||||
parse_execution_simulator_t sim;
|
parse_execution_simulator_t sim;
|
||||||
sim.context = &ctx;
|
sim.context = &ctx;
|
||||||
|
|
188
fish_tests.cpp
188
fish_tests.cpp
|
@ -1816,7 +1816,10 @@ static void test_new_parser_correctness(void)
|
||||||
{L"if true ; end", true},
|
{L"if true ; end", true},
|
||||||
{L"if true; end ; end", false},
|
{L"if true; end ; end", false},
|
||||||
{L"if end; end ; end", false},
|
{L"if end; end ; end", false},
|
||||||
{L"end", false}
|
{L"if end", false},
|
||||||
|
{L"end", false},
|
||||||
|
{L"for i i", false},
|
||||||
|
{L"for i in a b c ; end", true}
|
||||||
};
|
};
|
||||||
|
|
||||||
for (size_t i=0; i < sizeof parser_tests / sizeof *parser_tests; i++)
|
for (size_t i=0; i < sizeof parser_tests / sizeof *parser_tests; i++)
|
||||||
|
@ -1837,7 +1840,87 @@ static void test_new_parser_correctness(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
say(L"Parse tests complete");
|
say(L"Parse tests complete");
|
||||||
|
}
|
||||||
|
|
||||||
|
struct parser_fuzz_token_t
|
||||||
|
{
|
||||||
|
parse_token_type_t token_type;
|
||||||
|
parse_keyword_t keyword;
|
||||||
|
|
||||||
|
parser_fuzz_token_t() : token_type(FIRST_TERMINAL_TYPE), keyword(parse_keyword_none)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool increment(std::vector<parser_fuzz_token_t> &tokens)
|
||||||
|
{
|
||||||
|
size_t i, end = tokens.size();
|
||||||
|
for (i=0; i < end; i++)
|
||||||
|
{
|
||||||
|
bool wrapped = false;
|
||||||
|
|
||||||
|
struct parser_fuzz_token_t &token = tokens[i];
|
||||||
|
bool incremented_in_keyword = false;
|
||||||
|
if (token.token_type == parse_token_type_string)
|
||||||
|
{
|
||||||
|
// try incrementing the keyword
|
||||||
|
token.keyword++;
|
||||||
|
if (token.keyword <= LAST_KEYWORD)
|
||||||
|
{
|
||||||
|
incremented_in_keyword = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
token.keyword = parse_keyword_none;
|
||||||
|
incremented_in_keyword = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (! incremented_in_keyword)
|
||||||
|
{
|
||||||
|
token.token_type++;
|
||||||
|
if (token.token_type > LAST_TERMINAL_TYPE)
|
||||||
|
{
|
||||||
|
token.token_type = FIRST_TERMINAL_TYPE;
|
||||||
|
wrapped = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (! wrapped)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return i == end;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_new_parser_fuzzing(void)
|
||||||
|
{
|
||||||
|
say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t));
|
||||||
|
double start = timef();
|
||||||
|
// ensure nothing crashes
|
||||||
|
size_t max = 5;
|
||||||
|
for (size_t len=1; len <= max; len++)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "%lu / %lu\n", len, max);
|
||||||
|
std::vector<parser_fuzz_token_t> tokens(len);
|
||||||
|
do
|
||||||
|
{
|
||||||
|
parse_t parser;
|
||||||
|
parse_node_tree_t parse_tree;
|
||||||
|
parse_error_list_t errors;
|
||||||
|
for (size_t i=0; i < len; i++)
|
||||||
|
{
|
||||||
|
const parser_fuzz_token_t &token = tokens[i];
|
||||||
|
parser.parse_1_token(token.token_type, token.keyword, &parse_tree, &errors);
|
||||||
|
}
|
||||||
|
|
||||||
|
// keep going until we wrap
|
||||||
|
}
|
||||||
|
while (! increment(tokens));
|
||||||
|
}
|
||||||
|
double end = timef();
|
||||||
|
say(L"All fuzzed in %f seconds!", end - start);
|
||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((unused))
|
__attribute__((unused))
|
||||||
|
@ -1863,6 +1946,104 @@ static void test_new_parser(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_highlighting(void)
|
||||||
|
{
|
||||||
|
say(L"Testing syntax highlighting");
|
||||||
|
if (system("mkdir -p /tmp/fish_highlight_test/")) err(L"mkdir failed");
|
||||||
|
if (system("touch /tmp/fish_highlight_test/foo")) err(L"touch failed");
|
||||||
|
if (system("touch /tmp/fish_highlight_test/bar")) err(L"touch failed");
|
||||||
|
|
||||||
|
// Here are the components of our source and the colors we expect those to be
|
||||||
|
struct highlight_component_t {
|
||||||
|
const wchar_t *txt;
|
||||||
|
int color;
|
||||||
|
};
|
||||||
|
|
||||||
|
const highlight_component_t components1[] =
|
||||||
|
{
|
||||||
|
{L"echo", HIGHLIGHT_COMMAND},
|
||||||
|
{L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH},
|
||||||
|
{L"&", HIGHLIGHT_END},
|
||||||
|
{NULL, -1}
|
||||||
|
};
|
||||||
|
|
||||||
|
const highlight_component_t components2[] =
|
||||||
|
{
|
||||||
|
{L"command", HIGHLIGHT_COMMAND},
|
||||||
|
{L"echo", HIGHLIGHT_COMMAND},
|
||||||
|
{L"abc", HIGHLIGHT_PARAM},
|
||||||
|
{L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH},
|
||||||
|
{L"&", HIGHLIGHT_END},
|
||||||
|
{NULL, -1}
|
||||||
|
};
|
||||||
|
|
||||||
|
const highlight_component_t components3[] =
|
||||||
|
{
|
||||||
|
{L"if command ls", HIGHLIGHT_COMMAND},
|
||||||
|
{L"; ", HIGHLIGHT_END},
|
||||||
|
{L"echo", HIGHLIGHT_COMMAND},
|
||||||
|
{L"abc", HIGHLIGHT_PARAM},
|
||||||
|
{L"; ", HIGHLIGHT_END},
|
||||||
|
{L"/bin/definitely_not_a_command", HIGHLIGHT_ERROR},
|
||||||
|
{L"; ", HIGHLIGHT_END},
|
||||||
|
{L"end", HIGHLIGHT_COMMAND},
|
||||||
|
{NULL, -1}
|
||||||
|
};
|
||||||
|
|
||||||
|
const highlight_component_t *tests[] = {components1, components2, components3};
|
||||||
|
for (size_t which = 0; which < sizeof tests / sizeof *tests; which++)
|
||||||
|
{
|
||||||
|
const highlight_component_t *components = tests[which];
|
||||||
|
// Count how many we have
|
||||||
|
size_t component_count = 0;
|
||||||
|
while (components[component_count].txt != NULL)
|
||||||
|
{
|
||||||
|
component_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate the text
|
||||||
|
wcstring text;
|
||||||
|
std::vector<int> expected_colors;
|
||||||
|
for (size_t i=0; i < component_count; i++)
|
||||||
|
{
|
||||||
|
if (i > 0)
|
||||||
|
{
|
||||||
|
text.push_back(L' ');
|
||||||
|
expected_colors.push_back(0);
|
||||||
|
}
|
||||||
|
text.append(components[i].txt);
|
||||||
|
|
||||||
|
// hackish space handling
|
||||||
|
const size_t text_len = wcslen(components[i].txt);
|
||||||
|
for (size_t j=0; j < text_len; j++)
|
||||||
|
{
|
||||||
|
bool is_space = (components[i].txt[j] == L' ');
|
||||||
|
expected_colors.push_back(is_space ? 0 : components[i].color);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(expected_colors.size() == text.size());
|
||||||
|
|
||||||
|
std::vector<int> colors(text.size());
|
||||||
|
highlight_shell(text, colors, 20, NULL, env_vars_snapshot_t());
|
||||||
|
|
||||||
|
if (expected_colors.size() != colors.size())
|
||||||
|
{
|
||||||
|
err(L"Color vector has wrong size! Expected %lu, actual %lu", expected_colors.size(), colors.size());
|
||||||
|
}
|
||||||
|
assert(expected_colors.size() == colors.size());
|
||||||
|
for (size_t i=0; i < text.size(); i++)
|
||||||
|
{
|
||||||
|
if (expected_colors.at(i) != colors.at(i))
|
||||||
|
{
|
||||||
|
const wcstring spaces(i, L' ');
|
||||||
|
err(L"Wrong color at index %lu in text (expected %d, actual %d):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
system("rm -Rf /tmp/fish_highlight_test");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Main test
|
Main test
|
||||||
*/
|
*/
|
||||||
|
@ -1884,9 +2065,10 @@ int main(int argc, char **argv)
|
||||||
reader_init();
|
reader_init();
|
||||||
env_init();
|
env_init();
|
||||||
|
|
||||||
test_new_parser_correctness();
|
//test_new_parser_fuzzing();
|
||||||
|
//test_new_parser_correctness();
|
||||||
|
//test_highlighting();
|
||||||
//test_new_parser();
|
//test_new_parser();
|
||||||
return 0;
|
|
||||||
|
|
||||||
test_format();
|
test_format();
|
||||||
test_escape();
|
test_escape();
|
||||||
|
|
|
@ -1314,7 +1314,8 @@ void highlight_shell_magic(const wcstring &buff, std::vector<int> &color, size_t
|
||||||
void highlight_shell(const wcstring &buff, std::vector<int> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
|
void highlight_shell(const wcstring &buff, std::vector<int> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
|
||||||
{
|
{
|
||||||
ASSERT_IS_BACKGROUND_THREAD();
|
ASSERT_IS_BACKGROUND_THREAD();
|
||||||
if (1) {
|
if (0)
|
||||||
|
{
|
||||||
highlight_shell_magic(buff, color, pos, error, vars);
|
highlight_shell_magic(buff, color, pos, error, vars);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1810,6 +1811,20 @@ void highlight_shell_magic(const wcstring &buff, std::vector<int> &color, size_t
|
||||||
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color);
|
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case symbol_if_statement:
|
||||||
|
{
|
||||||
|
// Color the 'end'
|
||||||
|
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color);
|
||||||
|
|
||||||
|
// Color arguments and redirections
|
||||||
|
const parse_node_t *arguments = parse_tree.get_child(node, 3, symbol_arguments_or_redirections_list);
|
||||||
|
if (arguments != NULL)
|
||||||
|
{
|
||||||
|
color_arguments(buff, parse_tree, *arguments, color);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case symbol_redirection:
|
case symbol_redirection:
|
||||||
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color);
|
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -249,8 +249,10 @@ RESOLVE(argument_list)
|
||||||
{
|
{
|
||||||
switch (token_type)
|
switch (token_type)
|
||||||
{
|
{
|
||||||
case parse_token_type_string: return 1;
|
case parse_token_type_string:
|
||||||
default: return 0;
|
return 1;
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
250
parse_tree.cpp
250
parse_tree.cpp
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
using namespace parse_productions;
|
using namespace parse_productions;
|
||||||
|
|
||||||
|
/** Returns a string description of this parse error */
|
||||||
wcstring parse_error_t::describe(const wcstring &src) const
|
wcstring parse_error_t::describe(const wcstring &src) const
|
||||||
{
|
{
|
||||||
wcstring result = text;
|
wcstring result = text;
|
||||||
|
@ -41,6 +42,7 @@ wcstring parse_error_t::describe(const wcstring &src) const
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns a string description of the given token type */
|
||||||
wcstring token_type_description(parse_token_type_t type)
|
wcstring token_type_description(parse_token_type_t type)
|
||||||
{
|
{
|
||||||
switch (type)
|
switch (type)
|
||||||
|
@ -172,54 +174,52 @@ wcstring keyword_description(parse_keyword_t k)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns a string description of the given parse node */
|
||||||
wcstring parse_node_t::describe(void) const
|
wcstring parse_node_t::describe(void) const
|
||||||
{
|
{
|
||||||
wcstring result = token_type_description(type);
|
wcstring result = token_type_description(type);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** A struct representing the token type passed to */
|
||||||
struct parse_token_t
|
struct parse_token_t
|
||||||
{
|
{
|
||||||
enum parse_token_type_t type; // The type of the token as represented by the parser
|
enum parse_token_type_t type; // The type of the token as represented by the parser
|
||||||
enum token_type tokenizer_type; // The type of the token as represented by the tokenizer
|
|
||||||
enum parse_keyword_t keyword; // Any keyword represented by this parser
|
enum parse_keyword_t keyword; // Any keyword represented by this parser
|
||||||
size_t source_start;
|
size_t source_start;
|
||||||
size_t source_length;
|
size_t source_length;
|
||||||
|
|
||||||
wcstring describe() const;
|
wcstring describe() const
|
||||||
};
|
{
|
||||||
|
|
||||||
wcstring parse_token_t::describe(void) const
|
|
||||||
{
|
|
||||||
wcstring result = token_type_description(type);
|
wcstring result = token_type_description(type);
|
||||||
if (keyword != parse_keyword_none)
|
if (keyword != parse_keyword_none)
|
||||||
{
|
{
|
||||||
append_format(result, L" <%ls>", keyword_description(keyword).c_str());
|
append_format(result, L" <%ls>", keyword_description(keyword).c_str());
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Convert from tokenizer_t's token type to our token
|
/* Convert from tokenizer_t's token type to a parse_token_t type */
|
||||||
static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type)
|
static parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type)
|
||||||
{
|
{
|
||||||
parse_token_t result = {};
|
parse_token_type_t result = token_type_invalid;
|
||||||
result.tokenizer_type = tokenizer_token_type;
|
|
||||||
switch (tokenizer_token_type)
|
switch (tokenizer_token_type)
|
||||||
{
|
{
|
||||||
case TOK_STRING:
|
case TOK_STRING:
|
||||||
result.type = parse_token_type_string;
|
result = parse_token_type_string;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TOK_PIPE:
|
case TOK_PIPE:
|
||||||
result.type = parse_token_type_pipe;
|
result = parse_token_type_pipe;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TOK_END:
|
case TOK_END:
|
||||||
result.type = parse_token_type_end;
|
result = parse_token_type_end;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TOK_BACKGROUND:
|
case TOK_BACKGROUND:
|
||||||
result.type = parse_token_type_background;
|
result = parse_token_type_background;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TOK_REDIRECT_OUT:
|
case TOK_REDIRECT_OUT:
|
||||||
|
@ -227,15 +227,15 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_
|
||||||
case TOK_REDIRECT_IN:
|
case TOK_REDIRECT_IN:
|
||||||
case TOK_REDIRECT_FD:
|
case TOK_REDIRECT_FD:
|
||||||
case TOK_REDIRECT_NOCLOB:
|
case TOK_REDIRECT_NOCLOB:
|
||||||
result.type = parse_token_type_redirection;
|
result = parse_token_type_redirection;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TOK_ERROR:
|
case TOK_ERROR:
|
||||||
result.type = parse_special_type_tokenizer_error;
|
result = parse_special_type_tokenizer_error;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TOK_COMMENT:
|
case TOK_COMMENT:
|
||||||
result.type = parse_special_type_comment;
|
result = parse_special_type_comment;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
||||||
|
@ -247,6 +247,7 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Helper function for dump_tree */
|
||||||
static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line)
|
static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line)
|
||||||
{
|
{
|
||||||
assert(start < nodes.size());
|
assert(start < nodes.size());
|
||||||
|
@ -288,8 +289,8 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__attribute__((unused))
|
/* Gives a debugging textual description of a parse tree */
|
||||||
static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
|
wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
|
||||||
{
|
{
|
||||||
if (nodes.empty())
|
if (nodes.empty())
|
||||||
return L"(empty!)";
|
return L"(empty!)";
|
||||||
|
@ -300,6 +301,7 @@ static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Struct representing elements of the symbol stack, used in the internal state of the LL parser */
|
||||||
struct parse_stack_element_t
|
struct parse_stack_element_t
|
||||||
{
|
{
|
||||||
enum parse_token_type_t type;
|
enum parse_token_type_t type;
|
||||||
|
@ -323,31 +325,25 @@ struct parse_stack_element_t
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* The parser itself, private implementation of class parse_t. This is a hand-coded table-driven LL parser. Most hand-coded LL parsers are recursive descent, but recursive descent parsers are difficult to "pause", unlike table-driven parsers. */
|
||||||
class parse_ll_t
|
class parse_ll_t
|
||||||
{
|
{
|
||||||
friend class parse_t;
|
/* Traditional symbol stack of the LL parser */
|
||||||
|
std::vector<parse_stack_element_t> symbol_stack;
|
||||||
|
|
||||||
std::vector<parse_stack_element_t> symbol_stack; // LL parser stack
|
/* Parser output. This is a parse tree, but stored in an array. */
|
||||||
parse_node_tree_t nodes;
|
parse_node_tree_t nodes;
|
||||||
|
|
||||||
|
/* Whether we ran into a fatal error, including parse errors or tokenizer errors */
|
||||||
bool fatal_errored;
|
bool fatal_errored;
|
||||||
|
|
||||||
|
/* List of errors we have encountered */
|
||||||
parse_error_list_t errors;
|
parse_error_list_t errors;
|
||||||
|
|
||||||
// Constructor
|
/* The symbol stack can contain terminal types or symbols. Symbols go on to do productions, but terminal types are just matched against input tokens. */
|
||||||
parse_ll_t() : fatal_errored(false)
|
bool top_node_handle_terminal_types(parse_token_t token);
|
||||||
{
|
|
||||||
this->reset();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool top_node_match_token(parse_token_t token);
|
|
||||||
|
|
||||||
void accept_token(parse_token_t token, const wcstring &src);
|
|
||||||
|
|
||||||
// Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node.
|
|
||||||
void reset(void);
|
|
||||||
|
|
||||||
void parse_error(const wchar_t *expected, parse_token_t token);
|
void parse_error(const wchar_t *expected, parse_token_t token);
|
||||||
void parse_error(parse_token_t token, const wchar_t *format, ...);
|
void parse_error(parse_token_t token, const wchar_t *format, ...);
|
||||||
|
@ -355,9 +351,6 @@ class parse_ll_t
|
||||||
|
|
||||||
void dump_stack(void) const;
|
void dump_stack(void) const;
|
||||||
|
|
||||||
// Figure out the ranges of intermediate nodes
|
|
||||||
void determine_node_ranges();
|
|
||||||
|
|
||||||
// Get the node corresponding to the top element of the stack
|
// Get the node corresponding to the top element of the stack
|
||||||
parse_node_t &node_for_top_symbol()
|
parse_node_t &node_for_top_symbol()
|
||||||
{
|
{
|
||||||
|
@ -420,7 +413,12 @@ class parse_ll_t
|
||||||
for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++)
|
for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++)
|
||||||
{
|
{
|
||||||
production_element_t elem = (*production)[i];
|
production_element_t elem = (*production)[i];
|
||||||
if (production_element_is_valid(elem))
|
if (!production_element_is_valid(elem))
|
||||||
|
{
|
||||||
|
// All done, bail out
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
// Generate the parse node. Note that this push_back may invalidate node.
|
// Generate the parse node. Note that this push_back may invalidate node.
|
||||||
parse_token_type_t child_type = production_element_type(elem);
|
parse_token_type_t child_type = production_element_type(elem);
|
||||||
|
@ -452,6 +450,36 @@ class parse_ll_t
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
/* Constructor */
|
||||||
|
parse_ll_t() : fatal_errored(false)
|
||||||
|
{
|
||||||
|
this->symbol_stack.reserve(16);
|
||||||
|
this->nodes.reserve(64);
|
||||||
|
this->reset_symbols_and_nodes();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Input */
|
||||||
|
void accept_token(parse_token_t token);
|
||||||
|
|
||||||
|
/* Indicate if we hit a fatal error */
|
||||||
|
bool has_fatal_error(void) const
|
||||||
|
{
|
||||||
|
return this->fatal_errored;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. This is called from the constructor */
|
||||||
|
void reset_symbols(void);
|
||||||
|
|
||||||
|
/* Clear the parse symbol stack and the node tree. Add a new job_list_t goal node. This is called from the constructor. */
|
||||||
|
void reset_symbols_and_nodes(void);
|
||||||
|
|
||||||
|
/* Once parsing is complete, determine the ranges of intermediate nodes */
|
||||||
|
void determine_node_ranges();
|
||||||
|
|
||||||
|
/* Acquire output after parsing. This transfers directly from within self */
|
||||||
|
void acquire_output(parse_node_tree_t *output, parse_error_list_t *errors);
|
||||||
};
|
};
|
||||||
|
|
||||||
void parse_ll_t::dump_stack(void) const
|
void parse_ll_t::dump_stack(void) const
|
||||||
|
@ -509,7 +537,8 @@ void parse_ll_t::determine_node_ranges(void)
|
||||||
max_end = std::max(max_end, child.source_start + child.source_length);
|
max_end = std::max(max_end, child.source_start + child.source_length);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (min_start != source_start_invalid) {
|
if (min_start != source_start_invalid)
|
||||||
|
{
|
||||||
assert(max_end >= min_start);
|
assert(max_end >= min_start);
|
||||||
parent->source_start = min_start;
|
parent->source_start = min_start;
|
||||||
parent->source_length = max_end - min_start;
|
parent->source_length = max_end - min_start;
|
||||||
|
@ -517,6 +546,22 @@ void parse_ll_t::determine_node_ranges(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *errors)
|
||||||
|
{
|
||||||
|
if (output != NULL)
|
||||||
|
{
|
||||||
|
std::swap(*output, this->nodes);
|
||||||
|
}
|
||||||
|
this->nodes.clear();
|
||||||
|
|
||||||
|
if (errors != NULL)
|
||||||
|
{
|
||||||
|
std::swap(*errors, this->errors);
|
||||||
|
}
|
||||||
|
this->errors.clear();
|
||||||
|
this->symbol_stack.clear();
|
||||||
|
}
|
||||||
|
|
||||||
void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...)
|
void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...)
|
||||||
{
|
{
|
||||||
//this->dump_stack();
|
//this->dump_stack();
|
||||||
|
@ -545,9 +590,9 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token)
|
||||||
fatal_errored = true;
|
fatal_errored = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_ll_t::reset(void)
|
void parse_ll_t::reset_symbols(void)
|
||||||
{
|
{
|
||||||
// add a new job_list node and then reset our symbol list to point at it
|
/* Add a new job_list node, and then reset our symbol list to point at it */
|
||||||
node_offset_t where = nodes.size();
|
node_offset_t where = nodes.size();
|
||||||
nodes.push_back(parse_node_t(symbol_job_list));
|
nodes.push_back(parse_node_t(symbol_job_list));
|
||||||
|
|
||||||
|
@ -556,8 +601,31 @@ void parse_ll_t::reset(void)
|
||||||
this->fatal_errored = false;
|
this->fatal_errored = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Reset both symbols and nodes */
|
||||||
|
void parse_ll_t::reset_symbols_and_nodes(void)
|
||||||
|
{
|
||||||
|
nodes.clear();
|
||||||
|
this->reset_symbols();
|
||||||
|
}
|
||||||
|
|
||||||
bool parse_ll_t::top_node_match_token(parse_token_t token)
|
static bool type_is_terminal_type(parse_token_type_t type)
|
||||||
|
{
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case parse_token_type_string:
|
||||||
|
case parse_token_type_pipe:
|
||||||
|
case parse_token_type_redirection:
|
||||||
|
case parse_token_type_background:
|
||||||
|
case parse_token_type_end:
|
||||||
|
case parse_token_type_terminate:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token)
|
||||||
{
|
{
|
||||||
if (symbol_stack.empty())
|
if (symbol_stack.empty())
|
||||||
{
|
{
|
||||||
|
@ -568,38 +636,55 @@ bool parse_ll_t::top_node_match_token(parse_token_t token)
|
||||||
|
|
||||||
PARSE_ASSERT(! symbol_stack.empty());
|
PARSE_ASSERT(! symbol_stack.empty());
|
||||||
PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
|
PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
|
||||||
bool result = false;
|
bool handled = false;
|
||||||
parse_stack_element_t &stack_top = symbol_stack.back();
|
parse_stack_element_t &stack_top = symbol_stack.back();
|
||||||
|
if (type_is_terminal_type(stack_top.type))
|
||||||
|
{
|
||||||
|
// The top of the stack is terminal. We are going to handle this (because we can't produce from a terminal type)
|
||||||
|
handled = true;
|
||||||
|
|
||||||
|
// Now see if we actually matched
|
||||||
|
bool matched = false;
|
||||||
if (stack_top.type == token.type)
|
if (stack_top.type == token.type)
|
||||||
{
|
{
|
||||||
// So far so good. See if we need a particular keyword.
|
switch (stack_top.type)
|
||||||
if (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword)
|
{
|
||||||
|
case parse_token_type_string:
|
||||||
|
// We matched if the keywords match, or no keyword was required
|
||||||
|
matched = (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
// For other types, we only require that the types match
|
||||||
|
matched = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matched)
|
||||||
{
|
{
|
||||||
// Success. Tell the node that it matched this token
|
// Success. Tell the node that it matched this token
|
||||||
parse_node_t &node = node_for_top_symbol();
|
parse_node_t &node = node_for_top_symbol();
|
||||||
node.source_start = token.source_start;
|
node.source_start = token.source_start;
|
||||||
node.source_length = token.source_length;
|
node.source_length = token.source_length;
|
||||||
|
|
||||||
// We consumed this symbol
|
|
||||||
symbol_stack.pop_back();
|
|
||||||
result = true;
|
|
||||||
}
|
}
|
||||||
else if (token.type == parse_token_type_pipe)
|
else
|
||||||
{
|
{
|
||||||
// Pipes are primitive
|
// Failure
|
||||||
|
this->fatal_errored = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We handled the token, so pop the symbol stack
|
||||||
symbol_stack.pop_back();
|
symbol_stack.pop_back();
|
||||||
result = true;
|
|
||||||
}
|
}
|
||||||
}
|
return handled;
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
|
void parse_ll_t::accept_token(parse_token_t token)
|
||||||
{
|
{
|
||||||
bool logit = false;
|
bool logit = false;
|
||||||
if (logit)
|
if (logit)
|
||||||
{
|
{
|
||||||
const wcstring txt = wcstring(src, token.source_start, token.source_length);
|
|
||||||
fprintf(stderr, "Accept token %ls\n", token.describe().c_str());
|
fprintf(stderr, "Accept token %ls\n", token.describe().c_str());
|
||||||
}
|
}
|
||||||
PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
|
PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
|
||||||
|
@ -620,7 +705,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
|
||||||
{
|
{
|
||||||
PARSE_ASSERT(! symbol_stack.empty());
|
PARSE_ASSERT(! symbol_stack.empty());
|
||||||
|
|
||||||
if (top_node_match_token(token))
|
if (top_node_handle_terminal_types(token))
|
||||||
{
|
{
|
||||||
if (logit)
|
if (logit)
|
||||||
{
|
{
|
||||||
|
@ -715,7 +800,7 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
|
||||||
tok_options |= TOK_SHOW_COMMENTS;
|
tok_options |= TOK_SHOW_COMMENTS;
|
||||||
|
|
||||||
tokenizer_t tok = tokenizer_t(str.c_str(), tok_options);
|
tokenizer_t tok = tokenizer_t(str.c_str(), tok_options);
|
||||||
for (; tok_has_next(&tok) && ! this->parser->fatal_errored; tok_next(&tok))
|
for (; tok_has_next(&tok) && ! this->parser->has_fatal_error(); tok_next(&tok))
|
||||||
{
|
{
|
||||||
token_type tok_type = static_cast<token_type>(tok_last_type(&tok));
|
token_type tok_type = static_cast<token_type>(tok_last_type(&tok));
|
||||||
const wchar_t *tok_txt = tok_last(&tok);
|
const wchar_t *tok_txt = tok_last(&tok);
|
||||||
|
@ -723,22 +808,22 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
|
||||||
size_t tok_extent = tok_get_extent(&tok);
|
size_t tok_extent = tok_get_extent(&tok);
|
||||||
assert(tok_extent < 10000000); //paranoia
|
assert(tok_extent < 10000000); //paranoia
|
||||||
|
|
||||||
parse_token_t token = parse_token_from_tokenizer_token(tok_type);
|
parse_token_t token;
|
||||||
token.tokenizer_type = tok_type;
|
token.type = parse_token_type_from_tokenizer_token(tok_type);
|
||||||
token.source_start = (size_t)tok_start;
|
token.source_start = (size_t)tok_start;
|
||||||
token.source_length = tok_extent;
|
token.source_length = tok_extent;
|
||||||
token.keyword = keyword_for_token(tok_type, tok_txt);
|
token.keyword = keyword_for_token(tok_type, tok_txt);
|
||||||
this->parser->accept_token(token, str);
|
this->parser->accept_token(token);
|
||||||
|
|
||||||
if (this->parser->fatal_errored)
|
if (this->parser->has_fatal_error())
|
||||||
{
|
{
|
||||||
if (parse_flags & parse_flag_continue_after_error)
|
if (parse_flags & parse_flag_continue_after_error)
|
||||||
{
|
{
|
||||||
/* Mark an error and then keep going */
|
/* Mark an error and then keep going */
|
||||||
token.type = parse_special_type_parse_error;
|
token.type = parse_special_type_parse_error;
|
||||||
token.keyword = parse_keyword_none;
|
token.keyword = parse_keyword_none;
|
||||||
this->parser->accept_token(token, str);
|
this->parser->accept_token(token);
|
||||||
this->parser->reset();
|
this->parser->reset_symbols();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -757,19 +842,32 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
|
||||||
fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t));
|
fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (output != NULL)
|
// Acquire the output from the parser
|
||||||
{
|
this->parser->acquire_output(output, errors);
|
||||||
output->swap(this->parser->nodes);
|
|
||||||
this->parser->nodes.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (errors != NULL)
|
// Indicate if we had a fatal error
|
||||||
{
|
return ! this->parser->has_fatal_error();
|
||||||
errors->swap(this->parser->errors);
|
}
|
||||||
this->parser->errors.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
return ! this->parser->fatal_errored;
|
bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors)
|
||||||
|
{
|
||||||
|
// Only strings can have keywords. So if we have a keyword, the type must be a string
|
||||||
|
assert(keyword == parse_keyword_none || token_type == parse_token_type_string);
|
||||||
|
|
||||||
|
parse_token_t token;
|
||||||
|
token.type = token_type;
|
||||||
|
token.keyword = keyword;
|
||||||
|
token.source_start = -1;
|
||||||
|
token.source_length = 0;
|
||||||
|
|
||||||
|
this->parser->accept_token(token);
|
||||||
|
|
||||||
|
return ! this->parser->has_fatal_error();
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_t::clear()
|
||||||
|
{
|
||||||
|
this->parser->reset_symbols_and_nodes();
|
||||||
}
|
}
|
||||||
|
|
||||||
const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const
|
const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const
|
||||||
|
|
69
parse_tree.h
69
parse_tree.h
|
@ -36,29 +36,6 @@ struct parse_error_t
|
||||||
};
|
};
|
||||||
typedef std::vector<parse_error_t> parse_error_list_t;
|
typedef std::vector<parse_error_t> parse_error_list_t;
|
||||||
|
|
||||||
enum
|
|
||||||
{
|
|
||||||
parse_flag_none = 0,
|
|
||||||
|
|
||||||
/* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
|
|
||||||
parse_flag_continue_after_error = 1 << 0,
|
|
||||||
|
|
||||||
/* Include comment tokens */
|
|
||||||
parse_flag_include_comments = 1 << 1
|
|
||||||
};
|
|
||||||
typedef unsigned int parse_tree_flags_t;
|
|
||||||
|
|
||||||
class parse_ll_t;
|
|
||||||
class parse_t
|
|
||||||
{
|
|
||||||
parse_ll_t * const parser;
|
|
||||||
|
|
||||||
public:
|
|
||||||
parse_t();
|
|
||||||
~parse_t();
|
|
||||||
bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
|
|
||||||
};
|
|
||||||
|
|
||||||
enum parse_token_type_t
|
enum parse_token_type_t
|
||||||
{
|
{
|
||||||
token_type_invalid,
|
token_type_invalid,
|
||||||
|
@ -111,6 +88,9 @@ enum parse_token_type_t
|
||||||
parse_special_type_tokenizer_error,
|
parse_special_type_tokenizer_error,
|
||||||
parse_special_type_comment,
|
parse_special_type_comment,
|
||||||
|
|
||||||
|
FIRST_TERMINAL_TYPE = parse_token_type_string,
|
||||||
|
LAST_TERMINAL_TYPE = parse_token_type_terminate,
|
||||||
|
|
||||||
LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
|
LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
|
||||||
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
|
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
|
||||||
};
|
};
|
||||||
|
@ -132,9 +112,46 @@ enum parse_keyword_t
|
||||||
parse_keyword_or,
|
parse_keyword_or,
|
||||||
parse_keyword_not,
|
parse_keyword_not,
|
||||||
parse_keyword_command,
|
parse_keyword_command,
|
||||||
parse_keyword_builtin
|
parse_keyword_builtin,
|
||||||
|
|
||||||
|
LAST_KEYWORD = parse_keyword_builtin
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
parse_flag_none = 0,
|
||||||
|
|
||||||
|
/* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
|
||||||
|
parse_flag_continue_after_error = 1 << 0,
|
||||||
|
|
||||||
|
/* Include comment tokens */
|
||||||
|
parse_flag_include_comments = 1 << 1
|
||||||
|
};
|
||||||
|
typedef unsigned int parse_tree_flags_t;
|
||||||
|
|
||||||
|
class parse_ll_t;
|
||||||
|
class parse_t
|
||||||
|
{
|
||||||
|
parse_ll_t * const parser;
|
||||||
|
|
||||||
|
public:
|
||||||
|
parse_t();
|
||||||
|
~parse_t();
|
||||||
|
|
||||||
|
/* Parse a string */
|
||||||
|
bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
|
||||||
|
|
||||||
|
/* Parse a single token */
|
||||||
|
bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors);
|
||||||
|
|
||||||
|
/* Reset, ready to parse something else */
|
||||||
|
void clear();
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);
|
||||||
|
|
||||||
wcstring token_type_description(parse_token_type_t type);
|
wcstring token_type_description(parse_token_type_t type);
|
||||||
wcstring keyword_description(parse_keyword_t type);
|
wcstring keyword_description(parse_keyword_t type);
|
||||||
|
|
||||||
|
@ -184,7 +201,7 @@ public:
|
||||||
|
|
||||||
class parse_node_tree_t : public std::vector<parse_node_t>
|
class parse_node_tree_t : public std::vector<parse_node_t>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
/* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */
|
/* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */
|
||||||
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
|
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
|
||||||
|
@ -200,8 +217,8 @@ class parse_node_tree_t : public std::vector<parse_node_t>
|
||||||
# A job_list is a list of jobs, separated by semicolons or newlines
|
# A job_list is a list of jobs, separated by semicolons or newlines
|
||||||
|
|
||||||
job_list = <empty> |
|
job_list = <empty> |
|
||||||
<TOK_END> job_list |
|
|
||||||
job job_list
|
job job_list
|
||||||
|
<TOK_END> job_list
|
||||||
|
|
||||||
# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation
|
# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue