mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-13 13:39:02 +00:00
More work on new parser
This commit is contained in:
parent
8e07e55c1f
commit
e58b73179f
6 changed files with 532 additions and 215 deletions
|
@ -4075,7 +4075,10 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
|
|||
}
|
||||
else
|
||||
{
|
||||
if (0) {
|
||||
const wcstring dump = parse_dump_tree(parse_tree, src);
|
||||
fprintf(stderr, "%ls", dump.c_str());
|
||||
if (0)
|
||||
{
|
||||
parse_execution_context_t ctx(parse_tree, src);
|
||||
parse_execution_simulator_t sim;
|
||||
sim.context = &ctx;
|
||||
|
|
188
fish_tests.cpp
188
fish_tests.cpp
|
@ -1816,7 +1816,10 @@ static void test_new_parser_correctness(void)
|
|||
{L"if true ; end", true},
|
||||
{L"if true; end ; end", false},
|
||||
{L"if end; end ; end", false},
|
||||
{L"end", false}
|
||||
{L"if end", false},
|
||||
{L"end", false},
|
||||
{L"for i i", false},
|
||||
{L"for i in a b c ; end", true}
|
||||
};
|
||||
|
||||
for (size_t i=0; i < sizeof parser_tests / sizeof *parser_tests; i++)
|
||||
|
@ -1837,7 +1840,87 @@ static void test_new_parser_correctness(void)
|
|||
}
|
||||
}
|
||||
say(L"Parse tests complete");
|
||||
}
|
||||
|
||||
struct parser_fuzz_token_t
|
||||
{
|
||||
parse_token_type_t token_type;
|
||||
parse_keyword_t keyword;
|
||||
|
||||
parser_fuzz_token_t() : token_type(FIRST_TERMINAL_TYPE), keyword(parse_keyword_none)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
static bool increment(std::vector<parser_fuzz_token_t> &tokens)
|
||||
{
|
||||
size_t i, end = tokens.size();
|
||||
for (i=0; i < end; i++)
|
||||
{
|
||||
bool wrapped = false;
|
||||
|
||||
struct parser_fuzz_token_t &token = tokens[i];
|
||||
bool incremented_in_keyword = false;
|
||||
if (token.token_type == parse_token_type_string)
|
||||
{
|
||||
// try incrementing the keyword
|
||||
token.keyword++;
|
||||
if (token.keyword <= LAST_KEYWORD)
|
||||
{
|
||||
incremented_in_keyword = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
token.keyword = parse_keyword_none;
|
||||
incremented_in_keyword = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (! incremented_in_keyword)
|
||||
{
|
||||
token.token_type++;
|
||||
if (token.token_type > LAST_TERMINAL_TYPE)
|
||||
{
|
||||
token.token_type = FIRST_TERMINAL_TYPE;
|
||||
wrapped = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (! wrapped)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i == end;
|
||||
}
|
||||
|
||||
static void test_new_parser_fuzzing(void)
|
||||
{
|
||||
say(L"Fuzzing parser (node size: %lu)", sizeof(parse_node_t));
|
||||
double start = timef();
|
||||
// ensure nothing crashes
|
||||
size_t max = 5;
|
||||
for (size_t len=1; len <= max; len++)
|
||||
{
|
||||
fprintf(stderr, "%lu / %lu\n", len, max);
|
||||
std::vector<parser_fuzz_token_t> tokens(len);
|
||||
do
|
||||
{
|
||||
parse_t parser;
|
||||
parse_node_tree_t parse_tree;
|
||||
parse_error_list_t errors;
|
||||
for (size_t i=0; i < len; i++)
|
||||
{
|
||||
const parser_fuzz_token_t &token = tokens[i];
|
||||
parser.parse_1_token(token.token_type, token.keyword, &parse_tree, &errors);
|
||||
}
|
||||
|
||||
// keep going until we wrap
|
||||
}
|
||||
while (! increment(tokens));
|
||||
}
|
||||
double end = timef();
|
||||
say(L"All fuzzed in %f seconds!", end - start);
|
||||
}
|
||||
|
||||
__attribute__((unused))
|
||||
|
@ -1863,6 +1946,104 @@ static void test_new_parser(void)
|
|||
}
|
||||
}
|
||||
|
||||
static void test_highlighting(void)
|
||||
{
|
||||
say(L"Testing syntax highlighting");
|
||||
if (system("mkdir -p /tmp/fish_highlight_test/")) err(L"mkdir failed");
|
||||
if (system("touch /tmp/fish_highlight_test/foo")) err(L"touch failed");
|
||||
if (system("touch /tmp/fish_highlight_test/bar")) err(L"touch failed");
|
||||
|
||||
// Here are the components of our source and the colors we expect those to be
|
||||
struct highlight_component_t {
|
||||
const wchar_t *txt;
|
||||
int color;
|
||||
};
|
||||
|
||||
const highlight_component_t components1[] =
|
||||
{
|
||||
{L"echo", HIGHLIGHT_COMMAND},
|
||||
{L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH},
|
||||
{L"&", HIGHLIGHT_END},
|
||||
{NULL, -1}
|
||||
};
|
||||
|
||||
const highlight_component_t components2[] =
|
||||
{
|
||||
{L"command", HIGHLIGHT_COMMAND},
|
||||
{L"echo", HIGHLIGHT_COMMAND},
|
||||
{L"abc", HIGHLIGHT_PARAM},
|
||||
{L"/tmp/fish_highlight_test/foo", HIGHLIGHT_PARAM | HIGHLIGHT_VALID_PATH},
|
||||
{L"&", HIGHLIGHT_END},
|
||||
{NULL, -1}
|
||||
};
|
||||
|
||||
const highlight_component_t components3[] =
|
||||
{
|
||||
{L"if command ls", HIGHLIGHT_COMMAND},
|
||||
{L"; ", HIGHLIGHT_END},
|
||||
{L"echo", HIGHLIGHT_COMMAND},
|
||||
{L"abc", HIGHLIGHT_PARAM},
|
||||
{L"; ", HIGHLIGHT_END},
|
||||
{L"/bin/definitely_not_a_command", HIGHLIGHT_ERROR},
|
||||
{L"; ", HIGHLIGHT_END},
|
||||
{L"end", HIGHLIGHT_COMMAND},
|
||||
{NULL, -1}
|
||||
};
|
||||
|
||||
const highlight_component_t *tests[] = {components1, components2, components3};
|
||||
for (size_t which = 0; which < sizeof tests / sizeof *tests; which++)
|
||||
{
|
||||
const highlight_component_t *components = tests[which];
|
||||
// Count how many we have
|
||||
size_t component_count = 0;
|
||||
while (components[component_count].txt != NULL)
|
||||
{
|
||||
component_count++;
|
||||
}
|
||||
|
||||
// Generate the text
|
||||
wcstring text;
|
||||
std::vector<int> expected_colors;
|
||||
for (size_t i=0; i < component_count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
text.push_back(L' ');
|
||||
expected_colors.push_back(0);
|
||||
}
|
||||
text.append(components[i].txt);
|
||||
|
||||
// hackish space handling
|
||||
const size_t text_len = wcslen(components[i].txt);
|
||||
for (size_t j=0; j < text_len; j++)
|
||||
{
|
||||
bool is_space = (components[i].txt[j] == L' ');
|
||||
expected_colors.push_back(is_space ? 0 : components[i].color);
|
||||
}
|
||||
}
|
||||
assert(expected_colors.size() == text.size());
|
||||
|
||||
std::vector<int> colors(text.size());
|
||||
highlight_shell(text, colors, 20, NULL, env_vars_snapshot_t());
|
||||
|
||||
if (expected_colors.size() != colors.size())
|
||||
{
|
||||
err(L"Color vector has wrong size! Expected %lu, actual %lu", expected_colors.size(), colors.size());
|
||||
}
|
||||
assert(expected_colors.size() == colors.size());
|
||||
for (size_t i=0; i < text.size(); i++)
|
||||
{
|
||||
if (expected_colors.at(i) != colors.at(i))
|
||||
{
|
||||
const wcstring spaces(i, L' ');
|
||||
err(L"Wrong color at index %lu in text (expected %d, actual %d):\n%ls\n%ls^", i, expected_colors.at(i), colors.at(i), text.c_str(), spaces.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
system("rm -Rf /tmp/fish_highlight_test");
|
||||
}
|
||||
|
||||
/**
|
||||
Main test
|
||||
*/
|
||||
|
@ -1884,9 +2065,10 @@ int main(int argc, char **argv)
|
|||
reader_init();
|
||||
env_init();
|
||||
|
||||
test_new_parser_correctness();
|
||||
//test_new_parser_fuzzing();
|
||||
//test_new_parser_correctness();
|
||||
//test_highlighting();
|
||||
//test_new_parser();
|
||||
return 0;
|
||||
|
||||
test_format();
|
||||
test_escape();
|
||||
|
|
|
@ -1314,7 +1314,8 @@ void highlight_shell_magic(const wcstring &buff, std::vector<int> &color, size_t
|
|||
void highlight_shell(const wcstring &buff, std::vector<int> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
|
||||
{
|
||||
ASSERT_IS_BACKGROUND_THREAD();
|
||||
if (1) {
|
||||
if (0)
|
||||
{
|
||||
highlight_shell_magic(buff, color, pos, error, vars);
|
||||
return;
|
||||
}
|
||||
|
@ -1796,7 +1797,7 @@ void highlight_shell_magic(const wcstring &buff, std::vector<int> &color, size_t
|
|||
|
||||
switch (node.type)
|
||||
{
|
||||
// Color direct string descendants, e.g. 'for' and 'in'.
|
||||
// Color direct string descendants, e.g. 'for' and 'in'.
|
||||
case symbol_for_header:
|
||||
case symbol_while_header:
|
||||
case symbol_begin_header:
|
||||
|
@ -1810,6 +1811,20 @@ void highlight_shell_magic(const wcstring &buff, std::vector<int> &color, size_t
|
|||
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color);
|
||||
break;
|
||||
|
||||
case symbol_if_statement:
|
||||
{
|
||||
// Color the 'end'
|
||||
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color);
|
||||
|
||||
// Color arguments and redirections
|
||||
const parse_node_t *arguments = parse_tree.get_child(node, 3, symbol_arguments_or_redirections_list);
|
||||
if (arguments != NULL)
|
||||
{
|
||||
color_arguments(buff, parse_tree, *arguments, color);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case symbol_redirection:
|
||||
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color);
|
||||
break;
|
||||
|
|
|
@ -249,8 +249,10 @@ RESOLVE(argument_list)
|
|||
{
|
||||
switch (token_type)
|
||||
{
|
||||
case parse_token_type_string: return 1;
|
||||
default: return 0;
|
||||
case parse_token_type_string:
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -435,33 +437,33 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n
|
|||
production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) = NULL;
|
||||
switch (node_type)
|
||||
{
|
||||
TEST(job_list)
|
||||
TEST(job)
|
||||
TEST(statement)
|
||||
TEST(job_continuation)
|
||||
TEST(boolean_statement)
|
||||
TEST(block_statement)
|
||||
TEST(if_statement)
|
||||
TEST(if_clause)
|
||||
TEST(else_clause)
|
||||
TEST(else_continuation)
|
||||
TEST(switch_statement)
|
||||
TEST(decorated_statement)
|
||||
TEST(case_item_list)
|
||||
TEST(case_item)
|
||||
TEST(argument_list_nonempty)
|
||||
TEST(argument_list)
|
||||
TEST(block_header)
|
||||
TEST(for_header)
|
||||
TEST(while_header)
|
||||
TEST(begin_header)
|
||||
TEST(function_header)
|
||||
TEST(plain_statement)
|
||||
TEST(arguments_or_redirections_list)
|
||||
TEST(argument_or_redirection)
|
||||
TEST(argument)
|
||||
TEST(redirection)
|
||||
TEST(optional_background)
|
||||
TEST(job_list)
|
||||
TEST(job)
|
||||
TEST(statement)
|
||||
TEST(job_continuation)
|
||||
TEST(boolean_statement)
|
||||
TEST(block_statement)
|
||||
TEST(if_statement)
|
||||
TEST(if_clause)
|
||||
TEST(else_clause)
|
||||
TEST(else_continuation)
|
||||
TEST(switch_statement)
|
||||
TEST(decorated_statement)
|
||||
TEST(case_item_list)
|
||||
TEST(case_item)
|
||||
TEST(argument_list_nonempty)
|
||||
TEST(argument_list)
|
||||
TEST(block_header)
|
||||
TEST(for_header)
|
||||
TEST(while_header)
|
||||
TEST(begin_header)
|
||||
TEST(function_header)
|
||||
TEST(plain_statement)
|
||||
TEST(arguments_or_redirections_list)
|
||||
TEST(argument_or_redirection)
|
||||
TEST(argument)
|
||||
TEST(redirection)
|
||||
TEST(optional_background)
|
||||
|
||||
case parse_token_type_string:
|
||||
case parse_token_type_pipe:
|
||||
|
|
268
parse_tree.cpp
268
parse_tree.cpp
|
@ -4,6 +4,7 @@
|
|||
|
||||
using namespace parse_productions;
|
||||
|
||||
/** Returns a string description of this parse error */
|
||||
wcstring parse_error_t::describe(const wcstring &src) const
|
||||
{
|
||||
wcstring result = text;
|
||||
|
@ -41,6 +42,7 @@ wcstring parse_error_t::describe(const wcstring &src) const
|
|||
return result;
|
||||
}
|
||||
|
||||
/** Returns a string description of the given token type */
|
||||
wcstring token_type_description(parse_token_type_t type)
|
||||
{
|
||||
switch (type)
|
||||
|
@ -172,54 +174,52 @@ wcstring keyword_description(parse_keyword_t k)
|
|||
}
|
||||
}
|
||||
|
||||
/** Returns a string description of the given parse node */
|
||||
wcstring parse_node_t::describe(void) const
|
||||
{
|
||||
wcstring result = token_type_description(type);
|
||||
return result;
|
||||
}
|
||||
|
||||
/** A struct representing the token type passed to */
|
||||
struct parse_token_t
|
||||
{
|
||||
enum parse_token_type_t type; // The type of the token as represented by the parser
|
||||
enum token_type tokenizer_type; // The type of the token as represented by the tokenizer
|
||||
enum parse_keyword_t keyword; // Any keyword represented by this parser
|
||||
size_t source_start;
|
||||
size_t source_length;
|
||||
|
||||
wcstring describe() const;
|
||||
wcstring describe() const
|
||||
{
|
||||
wcstring result = token_type_description(type);
|
||||
if (keyword != parse_keyword_none)
|
||||
{
|
||||
append_format(result, L" <%ls>", keyword_description(keyword).c_str());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
wcstring parse_token_t::describe(void) const
|
||||
/* Convert from tokenizer_t's token type to a parse_token_t type */
|
||||
static parse_token_type_t parse_token_type_from_tokenizer_token(enum token_type tokenizer_token_type)
|
||||
{
|
||||
wcstring result = token_type_description(type);
|
||||
if (keyword != parse_keyword_none)
|
||||
{
|
||||
append_format(result, L" <%ls>", keyword_description(keyword).c_str());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Convert from tokenizer_t's token type to our token
|
||||
static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_token_type)
|
||||
{
|
||||
parse_token_t result = {};
|
||||
result.tokenizer_type = tokenizer_token_type;
|
||||
parse_token_type_t result = token_type_invalid;
|
||||
switch (tokenizer_token_type)
|
||||
{
|
||||
case TOK_STRING:
|
||||
result.type = parse_token_type_string;
|
||||
result = parse_token_type_string;
|
||||
break;
|
||||
|
||||
case TOK_PIPE:
|
||||
result.type = parse_token_type_pipe;
|
||||
result = parse_token_type_pipe;
|
||||
break;
|
||||
|
||||
case TOK_END:
|
||||
result.type = parse_token_type_end;
|
||||
result = parse_token_type_end;
|
||||
break;
|
||||
|
||||
case TOK_BACKGROUND:
|
||||
result.type = parse_token_type_background;
|
||||
result = parse_token_type_background;
|
||||
break;
|
||||
|
||||
case TOK_REDIRECT_OUT:
|
||||
|
@ -227,15 +227,15 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_
|
|||
case TOK_REDIRECT_IN:
|
||||
case TOK_REDIRECT_FD:
|
||||
case TOK_REDIRECT_NOCLOB:
|
||||
result.type = parse_token_type_redirection;
|
||||
result = parse_token_type_redirection;
|
||||
break;
|
||||
|
||||
case TOK_ERROR:
|
||||
result.type = parse_special_type_tokenizer_error;
|
||||
result = parse_special_type_tokenizer_error;
|
||||
break;
|
||||
|
||||
case TOK_COMMENT:
|
||||
result.type = parse_special_type_comment;
|
||||
result = parse_special_type_comment;
|
||||
break;
|
||||
|
||||
|
||||
|
@ -247,6 +247,7 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_
|
|||
return result;
|
||||
}
|
||||
|
||||
/* Helper function for dump_tree */
|
||||
static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line)
|
||||
{
|
||||
assert(start < nodes.size());
|
||||
|
@ -288,8 +289,8 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &
|
|||
}
|
||||
}
|
||||
|
||||
__attribute__((unused))
|
||||
static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
|
||||
/* Gives a debugging textual description of a parse tree */
|
||||
wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
|
||||
{
|
||||
if (nodes.empty())
|
||||
return L"(empty!)";
|
||||
|
@ -300,6 +301,7 @@ static wcstring dump_tree(const parse_node_tree_t &nodes, const wcstring &src)
|
|||
return result;
|
||||
}
|
||||
|
||||
/* Struct representing elements of the symbol stack, used in the internal state of the LL parser */
|
||||
struct parse_stack_element_t
|
||||
{
|
||||
enum parse_token_type_t type;
|
||||
|
@ -323,31 +325,25 @@ struct parse_stack_element_t
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/* The parser itself, private implementation of class parse_t. This is a hand-coded table-driven LL parser. Most hand-coded LL parsers are recursive descent, but recursive descent parsers are difficult to "pause", unlike table-driven parsers. */
|
||||
class parse_ll_t
|
||||
{
|
||||
friend class parse_t;
|
||||
/* Traditional symbol stack of the LL parser */
|
||||
std::vector<parse_stack_element_t> symbol_stack;
|
||||
|
||||
std::vector<parse_stack_element_t> symbol_stack; // LL parser stack
|
||||
/* Parser output. This is a parse tree, but stored in an array. */
|
||||
parse_node_tree_t nodes;
|
||||
|
||||
/* Whether we ran into a fatal error, including parse errors or tokenizer errors */
|
||||
bool fatal_errored;
|
||||
|
||||
/* List of errors we have encountered */
|
||||
parse_error_list_t errors;
|
||||
|
||||
// Constructor
|
||||
parse_ll_t() : fatal_errored(false)
|
||||
{
|
||||
this->reset();
|
||||
}
|
||||
|
||||
bool top_node_match_token(parse_token_t token);
|
||||
|
||||
void accept_token(parse_token_t token, const wcstring &src);
|
||||
|
||||
// Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node.
|
||||
void reset(void);
|
||||
/* The symbol stack can contain terminal types or symbols. Symbols go on to do productions, but terminal types are just matched against input tokens. */
|
||||
bool top_node_handle_terminal_types(parse_token_t token);
|
||||
|
||||
void parse_error(const wchar_t *expected, parse_token_t token);
|
||||
void parse_error(parse_token_t token, const wchar_t *format, ...);
|
||||
|
@ -355,9 +351,6 @@ class parse_ll_t
|
|||
|
||||
void dump_stack(void) const;
|
||||
|
||||
// Figure out the ranges of intermediate nodes
|
||||
void determine_node_ranges();
|
||||
|
||||
// Get the node corresponding to the top element of the stack
|
||||
parse_node_t &node_for_top_symbol()
|
||||
{
|
||||
|
@ -420,12 +413,17 @@ class parse_ll_t
|
|||
for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++)
|
||||
{
|
||||
production_element_t elem = (*production)[i];
|
||||
if (production_element_is_valid(elem))
|
||||
if (!production_element_is_valid(elem))
|
||||
{
|
||||
// All done, bail out
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Generate the parse node. Note that this push_back may invalidate node.
|
||||
parse_token_type_t child_type = production_element_type(elem);
|
||||
nodes.push_back(parse_node_t(child_type));
|
||||
child_count++;
|
||||
parse_token_type_t child_type = production_element_type(elem);
|
||||
nodes.push_back(parse_node_t(child_type));
|
||||
child_count++;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -452,6 +450,36 @@ class parse_ll_t
|
|||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/* Constructor */
|
||||
parse_ll_t() : fatal_errored(false)
|
||||
{
|
||||
this->symbol_stack.reserve(16);
|
||||
this->nodes.reserve(64);
|
||||
this->reset_symbols_and_nodes();
|
||||
}
|
||||
|
||||
/* Input */
|
||||
void accept_token(parse_token_t token);
|
||||
|
||||
/* Indicate if we hit a fatal error */
|
||||
bool has_fatal_error(void) const
|
||||
{
|
||||
return this->fatal_errored;
|
||||
}
|
||||
|
||||
/* Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node. This is called from the constructor */
|
||||
void reset_symbols(void);
|
||||
|
||||
/* Clear the parse symbol stack and the node tree. Add a new job_list_t goal node. This is called from the constructor. */
|
||||
void reset_symbols_and_nodes(void);
|
||||
|
||||
/* Once parsing is complete, determine the ranges of intermediate nodes */
|
||||
void determine_node_ranges();
|
||||
|
||||
/* Acquire output after parsing. This transfers directly from within self */
|
||||
void acquire_output(parse_node_tree_t *output, parse_error_list_t *errors);
|
||||
};
|
||||
|
||||
void parse_ll_t::dump_stack(void) const
|
||||
|
@ -509,7 +537,8 @@ void parse_ll_t::determine_node_ranges(void)
|
|||
max_end = std::max(max_end, child.source_start + child.source_length);
|
||||
}
|
||||
|
||||
if (min_start != source_start_invalid) {
|
||||
if (min_start != source_start_invalid)
|
||||
{
|
||||
assert(max_end >= min_start);
|
||||
parent->source_start = min_start;
|
||||
parent->source_length = max_end - min_start;
|
||||
|
@ -517,6 +546,22 @@ void parse_ll_t::determine_node_ranges(void)
|
|||
}
|
||||
}
|
||||
|
||||
void parse_ll_t::acquire_output(parse_node_tree_t *output, parse_error_list_t *errors)
|
||||
{
|
||||
if (output != NULL)
|
||||
{
|
||||
std::swap(*output, this->nodes);
|
||||
}
|
||||
this->nodes.clear();
|
||||
|
||||
if (errors != NULL)
|
||||
{
|
||||
std::swap(*errors, this->errors);
|
||||
}
|
||||
this->errors.clear();
|
||||
this->symbol_stack.clear();
|
||||
}
|
||||
|
||||
void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...)
|
||||
{
|
||||
//this->dump_stack();
|
||||
|
@ -545,9 +590,9 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token)
|
|||
fatal_errored = true;
|
||||
}
|
||||
|
||||
void parse_ll_t::reset(void)
|
||||
void parse_ll_t::reset_symbols(void)
|
||||
{
|
||||
// add a new job_list node and then reset our symbol list to point at it
|
||||
/* Add a new job_list node, and then reset our symbol list to point at it */
|
||||
node_offset_t where = nodes.size();
|
||||
nodes.push_back(parse_node_t(symbol_job_list));
|
||||
|
||||
|
@ -556,8 +601,31 @@ void parse_ll_t::reset(void)
|
|||
this->fatal_errored = false;
|
||||
}
|
||||
|
||||
/* Reset both symbols and nodes */
|
||||
void parse_ll_t::reset_symbols_and_nodes(void)
|
||||
{
|
||||
nodes.clear();
|
||||
this->reset_symbols();
|
||||
}
|
||||
|
||||
bool parse_ll_t::top_node_match_token(parse_token_t token)
|
||||
static bool type_is_terminal_type(parse_token_type_t type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case parse_token_type_string:
|
||||
case parse_token_type_pipe:
|
||||
case parse_token_type_redirection:
|
||||
case parse_token_type_background:
|
||||
case parse_token_type_end:
|
||||
case parse_token_type_terminate:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token)
|
||||
{
|
||||
if (symbol_stack.empty())
|
||||
{
|
||||
|
@ -568,38 +636,55 @@ bool parse_ll_t::top_node_match_token(parse_token_t token)
|
|||
|
||||
PARSE_ASSERT(! symbol_stack.empty());
|
||||
PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
|
||||
bool result = false;
|
||||
bool handled = false;
|
||||
parse_stack_element_t &stack_top = symbol_stack.back();
|
||||
if (stack_top.type == token.type)
|
||||
if (type_is_terminal_type(stack_top.type))
|
||||
{
|
||||
// So far so good. See if we need a particular keyword.
|
||||
if (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword)
|
||||
// The top of the stack is terminal. We are going to handle this (because we can't produce from a terminal type)
|
||||
handled = true;
|
||||
|
||||
// Now see if we actually matched
|
||||
bool matched = false;
|
||||
if (stack_top.type == token.type)
|
||||
{
|
||||
switch (stack_top.type)
|
||||
{
|
||||
case parse_token_type_string:
|
||||
// We matched if the keywords match, or no keyword was required
|
||||
matched = (stack_top.keyword == parse_keyword_none || stack_top.keyword == token.keyword);
|
||||
break;
|
||||
|
||||
default:
|
||||
// For other types, we only require that the types match
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (matched)
|
||||
{
|
||||
// Success. Tell the node that it matched this token
|
||||
parse_node_t &node = node_for_top_symbol();
|
||||
node.source_start = token.source_start;
|
||||
node.source_length = token.source_length;
|
||||
|
||||
// We consumed this symbol
|
||||
symbol_stack.pop_back();
|
||||
result = true;
|
||||
}
|
||||
else if (token.type == parse_token_type_pipe)
|
||||
else
|
||||
{
|
||||
// Pipes are primitive
|
||||
symbol_stack.pop_back();
|
||||
result = true;
|
||||
// Failure
|
||||
this->fatal_errored = true;
|
||||
}
|
||||
|
||||
// We handled the token, so pop the symbol stack
|
||||
symbol_stack.pop_back();
|
||||
}
|
||||
return result;
|
||||
return handled;
|
||||
}
|
||||
|
||||
void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
|
||||
void parse_ll_t::accept_token(parse_token_t token)
|
||||
{
|
||||
bool logit = false;
|
||||
if (logit)
|
||||
{
|
||||
const wcstring txt = wcstring(src, token.source_start, token.source_length);
|
||||
fprintf(stderr, "Accept token %ls\n", token.describe().c_str());
|
||||
}
|
||||
PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
|
||||
|
@ -620,7 +705,7 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
|
|||
{
|
||||
PARSE_ASSERT(! symbol_stack.empty());
|
||||
|
||||
if (top_node_match_token(token))
|
||||
if (top_node_handle_terminal_types(token))
|
||||
{
|
||||
if (logit)
|
||||
{
|
||||
|
@ -715,7 +800,7 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
|
|||
tok_options |= TOK_SHOW_COMMENTS;
|
||||
|
||||
tokenizer_t tok = tokenizer_t(str.c_str(), tok_options);
|
||||
for (; tok_has_next(&tok) && ! this->parser->fatal_errored; tok_next(&tok))
|
||||
for (; tok_has_next(&tok) && ! this->parser->has_fatal_error(); tok_next(&tok))
|
||||
{
|
||||
token_type tok_type = static_cast<token_type>(tok_last_type(&tok));
|
||||
const wchar_t *tok_txt = tok_last(&tok);
|
||||
|
@ -723,22 +808,22 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
|
|||
size_t tok_extent = tok_get_extent(&tok);
|
||||
assert(tok_extent < 10000000); //paranoia
|
||||
|
||||
parse_token_t token = parse_token_from_tokenizer_token(tok_type);
|
||||
token.tokenizer_type = tok_type;
|
||||
parse_token_t token;
|
||||
token.type = parse_token_type_from_tokenizer_token(tok_type);
|
||||
token.source_start = (size_t)tok_start;
|
||||
token.source_length = tok_extent;
|
||||
token.keyword = keyword_for_token(tok_type, tok_txt);
|
||||
this->parser->accept_token(token, str);
|
||||
this->parser->accept_token(token);
|
||||
|
||||
if (this->parser->fatal_errored)
|
||||
if (this->parser->has_fatal_error())
|
||||
{
|
||||
if (parse_flags & parse_flag_continue_after_error)
|
||||
{
|
||||
/* Mark an error and then keep going */
|
||||
token.type = parse_special_type_parse_error;
|
||||
token.keyword = parse_keyword_none;
|
||||
this->parser->accept_token(token, str);
|
||||
this->parser->reset();
|
||||
this->parser->accept_token(token);
|
||||
this->parser->reset_symbols();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -757,19 +842,32 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
|
|||
fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t));
|
||||
#endif
|
||||
|
||||
if (output != NULL)
|
||||
{
|
||||
output->swap(this->parser->nodes);
|
||||
this->parser->nodes.clear();
|
||||
}
|
||||
// Acquire the output from the parser
|
||||
this->parser->acquire_output(output, errors);
|
||||
|
||||
if (errors != NULL)
|
||||
{
|
||||
errors->swap(this->parser->errors);
|
||||
this->parser->errors.clear();
|
||||
}
|
||||
// Indicate if we had a fatal error
|
||||
return ! this->parser->has_fatal_error();
|
||||
}
|
||||
|
||||
return ! this->parser->fatal_errored;
|
||||
bool parse_t::parse_1_token(parse_token_type_t token_type, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors)
|
||||
{
|
||||
// Only strings can have keywords. So if we have a keyword, the type must be a string
|
||||
assert(keyword == parse_keyword_none || token_type == parse_token_type_string);
|
||||
|
||||
parse_token_t token;
|
||||
token.type = token_type;
|
||||
token.keyword = keyword;
|
||||
token.source_start = -1;
|
||||
token.source_length = 0;
|
||||
|
||||
this->parser->accept_token(token);
|
||||
|
||||
return ! this->parser->has_fatal_error();
|
||||
}
|
||||
|
||||
void parse_t::clear()
|
||||
{
|
||||
this->parser->reset_symbols_and_nodes();
|
||||
}
|
||||
|
||||
const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const
|
||||
|
|
69
parse_tree.h
69
parse_tree.h
|
@ -36,29 +36,6 @@ struct parse_error_t
|
|||
};
|
||||
typedef std::vector<parse_error_t> parse_error_list_t;
|
||||
|
||||
enum
|
||||
{
|
||||
parse_flag_none = 0,
|
||||
|
||||
/* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
|
||||
parse_flag_continue_after_error = 1 << 0,
|
||||
|
||||
/* Include comment tokens */
|
||||
parse_flag_include_comments = 1 << 1
|
||||
};
|
||||
typedef unsigned int parse_tree_flags_t;
|
||||
|
||||
class parse_ll_t;
|
||||
class parse_t
|
||||
{
|
||||
parse_ll_t * const parser;
|
||||
|
||||
public:
|
||||
parse_t();
|
||||
~parse_t();
|
||||
bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
|
||||
};
|
||||
|
||||
enum parse_token_type_t
|
||||
{
|
||||
token_type_invalid,
|
||||
|
@ -111,6 +88,9 @@ enum parse_token_type_t
|
|||
parse_special_type_tokenizer_error,
|
||||
parse_special_type_comment,
|
||||
|
||||
FIRST_TERMINAL_TYPE = parse_token_type_string,
|
||||
LAST_TERMINAL_TYPE = parse_token_type_terminate,
|
||||
|
||||
LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
|
||||
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
|
||||
};
|
||||
|
@ -132,9 +112,46 @@ enum parse_keyword_t
|
|||
parse_keyword_or,
|
||||
parse_keyword_not,
|
||||
parse_keyword_command,
|
||||
parse_keyword_builtin
|
||||
parse_keyword_builtin,
|
||||
|
||||
LAST_KEYWORD = parse_keyword_builtin
|
||||
};
|
||||
|
||||
|
||||
enum
|
||||
{
|
||||
parse_flag_none = 0,
|
||||
|
||||
/* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
|
||||
parse_flag_continue_after_error = 1 << 0,
|
||||
|
||||
/* Include comment tokens */
|
||||
parse_flag_include_comments = 1 << 1
|
||||
};
|
||||
typedef unsigned int parse_tree_flags_t;
|
||||
|
||||
class parse_ll_t;
|
||||
class parse_t
|
||||
{
|
||||
parse_ll_t * const parser;
|
||||
|
||||
public:
|
||||
parse_t();
|
||||
~parse_t();
|
||||
|
||||
/* Parse a string */
|
||||
bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
|
||||
|
||||
/* Parse a single token */
|
||||
bool parse_1_token(parse_token_type_t token, parse_keyword_t keyword, parse_node_tree_t *output, parse_error_list_t *errors);
|
||||
|
||||
/* Reset, ready to parse something else */
|
||||
void clear();
|
||||
|
||||
};
|
||||
|
||||
wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);
|
||||
|
||||
wcstring token_type_description(parse_token_type_t type);
|
||||
wcstring keyword_description(parse_keyword_t type);
|
||||
|
||||
|
@ -184,7 +201,7 @@ public:
|
|||
|
||||
class parse_node_tree_t : public std::vector<parse_node_t>
|
||||
{
|
||||
public:
|
||||
public:
|
||||
|
||||
/* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */
|
||||
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
|
||||
|
@ -200,8 +217,8 @@ class parse_node_tree_t : public std::vector<parse_node_t>
|
|||
# A job_list is a list of jobs, separated by semicolons or newlines
|
||||
|
||||
job_list = <empty> |
|
||||
<TOK_END> job_list |
|
||||
job job_list
|
||||
<TOK_END> job_list
|
||||
|
||||
# A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases like if statements, where we require a command). To represent "non-empty", we require a statement, followed by a possibly empty job_continuation
|
||||
|
||||
|
|
Loading…
Reference in a new issue