mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-26 12:53:13 +00:00
More work on new parser
This commit is contained in:
parent
6a6593335d
commit
8e07e55c1f
9 changed files with 708 additions and 32 deletions
|
@ -4063,7 +4063,7 @@ int builtin_parse(parser_t &parser, wchar_t **argv)
|
|||
parse_node_tree_t parse_tree;
|
||||
parse_error_list_t errors;
|
||||
parse_t parser;
|
||||
bool success = parser.parse(src, &parse_tree, &errors);
|
||||
bool success = parser.parse(src, parse_flag_none, &parse_tree, &errors, true);
|
||||
if (! success)
|
||||
{
|
||||
stdout_buffer.append(L"Parsing failed:\n");
|
||||
|
|
|
@ -507,7 +507,7 @@ const wchar_t *wcsfuncname(const wchar_t *str)
|
|||
}
|
||||
|
||||
|
||||
int wcsvarchr(wchar_t chr)
|
||||
bool wcsvarchr(wchar_t chr)
|
||||
{
|
||||
return iswalnum(chr) || chr == L'_';
|
||||
}
|
||||
|
|
4
common.h
4
common.h
|
@ -608,10 +608,10 @@ const wchar_t *wcsfuncname(const wchar_t *str);
|
|||
/**
|
||||
Test if the given string is valid in a variable name
|
||||
|
||||
\return 1 if this is a valid name, 0 otherwise
|
||||
\return true if this is a valid name, false otherwise
|
||||
*/
|
||||
|
||||
int wcsvarchr(wchar_t chr);
|
||||
bool wcsvarchr(wchar_t chr);
|
||||
|
||||
|
||||
/**
|
||||
|
|
|
@ -1801,23 +1801,65 @@ void history_tests_t::test_history_speed(void)
|
|||
delete hist;
|
||||
}
|
||||
|
||||
static void test_new_parser_correctness(void)
|
||||
{
|
||||
say(L"Testing new parser!");
|
||||
const struct parser_test_t
|
||||
{
|
||||
const wchar_t *src;
|
||||
bool ok;
|
||||
}
|
||||
parser_tests[] =
|
||||
{
|
||||
{L"; ; ; ", true},
|
||||
{L"if ; end", false},
|
||||
{L"if true ; end", true},
|
||||
{L"if true; end ; end", false},
|
||||
{L"if end; end ; end", false},
|
||||
{L"end", false}
|
||||
};
|
||||
|
||||
for (size_t i=0; i < sizeof parser_tests / sizeof *parser_tests; i++)
|
||||
{
|
||||
const parser_test_t *test = &parser_tests[i];
|
||||
|
||||
parse_node_tree_t parse_tree;
|
||||
parse_t parser;
|
||||
bool success = parser.parse(test->src, parse_flag_none, &parse_tree, NULL);
|
||||
say(L"%lu / %lu: Parse \"%ls\": %s", i+1, sizeof parser_tests / sizeof *parser_tests, test->src, success ? "yes" : "no");
|
||||
if (success && ! test->ok)
|
||||
{
|
||||
err(L"\"%ls\" should NOT have parsed, but did", test->src);
|
||||
}
|
||||
else if (! success && test->ok)
|
||||
{
|
||||
err(L"\"%ls\" should have parsed, but failed", test->src);
|
||||
}
|
||||
}
|
||||
say(L"Parse tests complete");
|
||||
|
||||
}
|
||||
|
||||
__attribute__((unused))
|
||||
static void test_new_parser(void)
|
||||
{
|
||||
say(L"Testing new parser!");
|
||||
const wcstring src = L"echo hello world";
|
||||
parse_node_tree_t parse_tree;
|
||||
parse_t parser;
|
||||
bool success = parser.parse(src, &parse_tree, NULL);
|
||||
bool success = parser.parse(src, parse_flag_none, &parse_tree, NULL);
|
||||
if (! success)
|
||||
{
|
||||
say(L"Parsing failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
#if 0
|
||||
parse_execution_context_t ctx(parse_tree, src);
|
||||
say(L"Simulating execution:");
|
||||
wcstring simulation = ctx.simulate();
|
||||
say(simulation.c_str());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1827,13 +1869,12 @@ static void test_new_parser(void)
|
|||
int main(int argc, char **argv)
|
||||
{
|
||||
setlocale(LC_ALL, "");
|
||||
srand(time(0));
|
||||
//srand(time(0));
|
||||
configure_thread_assertions_for_testing();
|
||||
|
||||
program_name=L"(ignore)";
|
||||
|
||||
say(L"Testing low-level functionality");
|
||||
say(L"Lines beginning with '(ignore):' are not errors, they are warning messages\ngenerated by the fish parser library when given broken input, and can be\nignored. All actual errors begin with 'Error:'.");
|
||||
set_main_thread();
|
||||
setup_fork_guards();
|
||||
//proc_init();
|
||||
|
@ -1843,7 +1884,8 @@ int main(int argc, char **argv)
|
|||
reader_init();
|
||||
env_init();
|
||||
|
||||
test_new_parser();
|
||||
test_new_parser_correctness();
|
||||
//test_new_parser();
|
||||
return 0;
|
||||
|
||||
test_format();
|
||||
|
|
412
highlight.cpp
412
highlight.cpp
|
@ -34,6 +34,7 @@
|
|||
#include "wildcard.h"
|
||||
#include "path.h"
|
||||
#include "history.h"
|
||||
#include "parse_tree.h"
|
||||
|
||||
/**
|
||||
Number of elements in the highlight_var array
|
||||
|
@ -1307,11 +1308,16 @@ static void tokenize(const wchar_t * const buff, std::vector<int> &color, const
|
|||
}
|
||||
}
|
||||
|
||||
void highlight_shell_magic(const wcstring &buff, std::vector<int> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars);
|
||||
|
||||
// PCA This function does I/O, (calls is_potential_path, path_get_path, maybe others) and so ought to only run on a background thread
|
||||
void highlight_shell(const wcstring &buff, std::vector<int> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
|
||||
{
|
||||
ASSERT_IS_BACKGROUND_THREAD();
|
||||
if (1) {
|
||||
highlight_shell_magic(buff, color, pos, error, vars);
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t length = buff.size();
|
||||
assert(buff.size() == color.size());
|
||||
|
@ -1440,7 +1446,413 @@ void highlight_shell(const wcstring &buff, std::vector<int> &color, size_t pos,
|
|||
}
|
||||
}
|
||||
|
||||
static void color_node(const parse_node_t &node, int color, std::vector<int> &color_array)
|
||||
{
|
||||
// Can only color nodes with valid source ranges
|
||||
if (! node.has_source())
|
||||
return;
|
||||
|
||||
// Fill the color array with our color in the corresponding range
|
||||
size_t source_end = node.source_start + node.source_length;
|
||||
assert(source_end >= node.source_start);
|
||||
assert(source_end <= color_array.size());
|
||||
|
||||
std::fill(color_array.begin() + node.source_start, color_array.begin() + source_end, color);
|
||||
}
|
||||
|
||||
static void color_argument(const wcstring &buffstr, std::vector<int>::iterator colors, int normal_status)
|
||||
{
|
||||
const size_t buff_len = buffstr.size();
|
||||
std::fill(colors, colors + buff_len, normal_status);
|
||||
|
||||
enum {e_unquoted, e_single_quoted, e_double_quoted} mode = e_unquoted;
|
||||
int bracket_count=0;
|
||||
for (size_t in_pos=0; in_pos < buff_len; in_pos++)
|
||||
{
|
||||
const wchar_t c = buffstr.at(in_pos);
|
||||
switch (mode)
|
||||
{
|
||||
case e_unquoted:
|
||||
{
|
||||
if (c == L'\\')
|
||||
{
|
||||
int fill_color = HIGHLIGHT_ESCAPE; //may be set to HIGHLIGHT_ERROR
|
||||
const size_t backslash_pos = in_pos;
|
||||
size_t fill_end = backslash_pos;
|
||||
|
||||
// Move to the escaped character
|
||||
in_pos++;
|
||||
const wchar_t escaped_char = (in_pos < buff_len ? buffstr.at(in_pos) : L'\0');
|
||||
|
||||
if (escaped_char == L'\0')
|
||||
{
|
||||
fill_end = in_pos;
|
||||
fill_color = HIGHLIGHT_ERROR;
|
||||
}
|
||||
else if (wcschr(L"~%", escaped_char))
|
||||
{
|
||||
if (in_pos == 1)
|
||||
{
|
||||
fill_end = in_pos + 1;
|
||||
}
|
||||
}
|
||||
else if (escaped_char == L',')
|
||||
{
|
||||
if (bracket_count)
|
||||
{
|
||||
fill_end = in_pos + 1;
|
||||
}
|
||||
}
|
||||
else if (wcschr(L"abefnrtv*?$(){}[]'\"<>^ \\#;|&", escaped_char))
|
||||
{
|
||||
fill_end = in_pos + 1;
|
||||
}
|
||||
else if (wcschr(L"c", escaped_char))
|
||||
{
|
||||
// Like \ci. So highlight three characters
|
||||
fill_end = in_pos + 1;
|
||||
}
|
||||
else if (wcschr(L"uUxX01234567", escaped_char))
|
||||
{
|
||||
long long res=0;
|
||||
int chars=2;
|
||||
int base=16;
|
||||
|
||||
wchar_t max_val = ASCII_MAX;
|
||||
|
||||
switch (escaped_char)
|
||||
{
|
||||
case L'u':
|
||||
{
|
||||
chars=4;
|
||||
max_val = UCS2_MAX;
|
||||
in_pos++;
|
||||
break;
|
||||
}
|
||||
|
||||
case L'U':
|
||||
{
|
||||
chars=8;
|
||||
max_val = WCHAR_MAX;
|
||||
in_pos++;
|
||||
break;
|
||||
}
|
||||
|
||||
case L'x':
|
||||
{
|
||||
in_pos++;
|
||||
break;
|
||||
}
|
||||
|
||||
case L'X':
|
||||
{
|
||||
max_val = BYTE_MAX;
|
||||
in_pos++;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
// a digit like \12
|
||||
base=8;
|
||||
chars=3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Consume
|
||||
for (int i=0; i < chars && in_pos < buff_len; i++)
|
||||
{
|
||||
long d = convert_digit(buffstr.at(in_pos), base);
|
||||
if (d < 0)
|
||||
break;
|
||||
res = (res * base) + d;
|
||||
in_pos++;
|
||||
}
|
||||
//in_pos is now at the first character that could not be converted (or buff_len)
|
||||
assert(in_pos >= backslash_pos && in_pos <= buff_len);
|
||||
fill_end = in_pos;
|
||||
|
||||
// It's an error if we exceeded the max value
|
||||
if (res > max_val)
|
||||
fill_color = HIGHLIGHT_ERROR;
|
||||
|
||||
// Subtract one from in_pos, so that the increment in the loop will move to the next character
|
||||
in_pos--;
|
||||
}
|
||||
assert(fill_end >= backslash_pos);
|
||||
std::fill(colors + backslash_pos, colors + fill_end, fill_color);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not a backslash
|
||||
switch (c)
|
||||
{
|
||||
case L'~':
|
||||
case L'%':
|
||||
{
|
||||
if (in_pos == 0)
|
||||
{
|
||||
colors[in_pos] = HIGHLIGHT_OPERATOR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case L'$':
|
||||
{
|
||||
assert(in_pos < buff_len);
|
||||
int dollar_color = HIGHLIGHT_ERROR;
|
||||
if (in_pos + 1 < buff_len)
|
||||
{
|
||||
wchar_t next = buffstr.at(in_pos + 1);
|
||||
if (next == L'$' || wcsvarchr(next))
|
||||
dollar_color = HIGHLIGHT_OPERATOR;
|
||||
}
|
||||
colors[in_pos] = dollar_color;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case L'*':
|
||||
case L'?':
|
||||
case L'(':
|
||||
case L')':
|
||||
{
|
||||
colors[in_pos] = HIGHLIGHT_OPERATOR;
|
||||
break;
|
||||
}
|
||||
|
||||
case L'{':
|
||||
{
|
||||
colors[in_pos] = HIGHLIGHT_OPERATOR;
|
||||
bracket_count++;
|
||||
break;
|
||||
}
|
||||
|
||||
case L'}':
|
||||
{
|
||||
colors[in_pos] = HIGHLIGHT_OPERATOR;
|
||||
bracket_count--;
|
||||
break;
|
||||
}
|
||||
|
||||
case L',':
|
||||
{
|
||||
if (bracket_count > 0)
|
||||
{
|
||||
colors[in_pos] = HIGHLIGHT_OPERATOR;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case L'\'':
|
||||
{
|
||||
colors[in_pos] = HIGHLIGHT_QUOTE;
|
||||
mode = e_single_quoted;
|
||||
break;
|
||||
}
|
||||
|
||||
case L'\"':
|
||||
{
|
||||
colors[in_pos] = HIGHLIGHT_QUOTE;
|
||||
mode = e_double_quoted;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
Mode 1 means single quoted string, i.e 'foo'
|
||||
*/
|
||||
case e_single_quoted:
|
||||
{
|
||||
colors[in_pos] = HIGHLIGHT_QUOTE;
|
||||
if (c == L'\\')
|
||||
{
|
||||
// backslash
|
||||
if (in_pos + 1 < buff_len)
|
||||
{
|
||||
const wchar_t escaped_char = buffstr.at(in_pos + 1);
|
||||
if (escaped_char == L'\\' || escaped_char == L'\'')
|
||||
{
|
||||
colors[in_pos] = HIGHLIGHT_ESCAPE; //backslash
|
||||
colors[in_pos + 1] = HIGHLIGHT_ESCAPE; //escaped char
|
||||
in_pos += 1; //skip over backslash
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (c == L'\'')
|
||||
{
|
||||
mode = e_unquoted;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
Mode 2 means double quoted string, i.e. "foo"
|
||||
*/
|
||||
case e_double_quoted:
|
||||
{
|
||||
colors[in_pos] = HIGHLIGHT_QUOTE;
|
||||
switch (c)
|
||||
{
|
||||
case L'"':
|
||||
{
|
||||
mode = e_unquoted;
|
||||
break;
|
||||
}
|
||||
|
||||
case L'\\':
|
||||
{
|
||||
// backslash
|
||||
if (in_pos + 1 < buff_len)
|
||||
{
|
||||
const wchar_t escaped_char = buffstr.at(in_pos + 1);
|
||||
if (escaped_char == L'\\' || escaped_char == L'\'' || escaped_char == L'$')
|
||||
{
|
||||
colors[in_pos] = HIGHLIGHT_ESCAPE; //backslash
|
||||
colors[in_pos + 1] = HIGHLIGHT_ESCAPE; //escaped char
|
||||
in_pos += 1; //skip over backslash
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case L'$':
|
||||
{
|
||||
int dollar_color = HIGHLIGHT_ERROR;
|
||||
if (in_pos + 1 < buff_len)
|
||||
{
|
||||
wchar_t next = buffstr.at(in_pos + 1);
|
||||
if (next == L'$' || wcsvarchr(next))
|
||||
dollar_color = HIGHLIGHT_OPERATOR;
|
||||
}
|
||||
colors[in_pos] = dollar_color;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Color all of the arguments of the given command
|
||||
static void color_arguments(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &parent, std::vector<int> &color_array)
|
||||
{
|
||||
const parse_node_tree_t::parse_node_list_t nodes = tree.find_nodes(parent, symbol_argument);
|
||||
|
||||
wcstring param;
|
||||
for (node_offset_t i=0; i < nodes.size(); i++)
|
||||
{
|
||||
const parse_node_t *child = nodes.at(i);
|
||||
assert(child != NULL && child->type == symbol_argument);
|
||||
param.assign(src, child->source_start, child->source_length);
|
||||
color_argument(param, color_array.begin() + child->source_start, HIGHLIGHT_NORMAL);
|
||||
}
|
||||
}
|
||||
|
||||
static void color_children(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, int color, std::vector<int> &color_array)
|
||||
{
|
||||
for (node_offset_t idx=0; idx < parent.child_count; idx++)
|
||||
{
|
||||
const parse_node_t *child = tree.get_child(parent, idx);
|
||||
if (child != NULL && child->type == type && child->has_source())
|
||||
{
|
||||
color_node(*child, color, color_array);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void highlight_shell_magic(const wcstring &buff, std::vector<int> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
|
||||
{
|
||||
ASSERT_IS_BACKGROUND_THREAD();
|
||||
|
||||
const size_t length = buff.size();
|
||||
assert(buff.size() == color.size());
|
||||
|
||||
if (length == 0)
|
||||
return;
|
||||
|
||||
std::fill(color.begin(), color.end(), -1);
|
||||
|
||||
/* Do something sucky and get the current working directory on this background thread. This should really be passed in. */
|
||||
const wcstring working_directory = env_get_pwd_slash();
|
||||
|
||||
/* Parse the buffer */
|
||||
parse_node_tree_t parse_tree;
|
||||
parse_t parser;
|
||||
parser.parse(buff, parse_flag_continue_after_error | parse_flag_include_comments, &parse_tree, NULL);
|
||||
|
||||
/* Walk the node tree */
|
||||
for (parse_node_tree_t::const_iterator iter = parse_tree.begin(); iter != parse_tree.end(); ++iter)
|
||||
{
|
||||
const parse_node_t &node = *iter;
|
||||
|
||||
switch (node.type)
|
||||
{
|
||||
// Color direct string descendants, e.g. 'for' and 'in'.
|
||||
case symbol_for_header:
|
||||
case symbol_while_header:
|
||||
case symbol_begin_header:
|
||||
case symbol_function_header:
|
||||
case symbol_if_clause:
|
||||
case symbol_else_clause:
|
||||
case symbol_case_item:
|
||||
case symbol_switch_statement:
|
||||
case symbol_boolean_statement:
|
||||
case symbol_decorated_statement:
|
||||
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color);
|
||||
break;
|
||||
|
||||
case symbol_redirection:
|
||||
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_REDIRECTION, color);
|
||||
break;
|
||||
|
||||
case parse_token_type_background:
|
||||
case parse_token_type_end:
|
||||
color_node(node, HIGHLIGHT_END, color);
|
||||
break;
|
||||
|
||||
case symbol_plain_statement:
|
||||
{
|
||||
// Color the command
|
||||
color_children(parse_tree, node, parse_token_type_string, HIGHLIGHT_COMMAND, color);
|
||||
|
||||
// Color arguments
|
||||
const parse_node_t *arguments = parse_tree.get_child(node, 1, symbol_arguments_or_redirections_list);
|
||||
if (arguments != NULL)
|
||||
{
|
||||
color_arguments(buff, parse_tree, *arguments, color);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case symbol_arguments_or_redirections_list:
|
||||
case symbol_argument_list:
|
||||
/* Nothing, these are handled by their parents */
|
||||
break;
|
||||
|
||||
case parse_special_type_parse_error:
|
||||
case parse_special_type_tokenizer_error:
|
||||
color_node(node, HIGHLIGHT_ERROR, color);
|
||||
break;
|
||||
|
||||
case parse_special_type_comment:
|
||||
color_node(node, HIGHLIGHT_COMMENT, color);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
Perform quote and parenthesis highlighting on the specified string.
|
||||
|
|
|
@ -84,6 +84,7 @@ struct file_detection_context_t;
|
|||
\param error a list in which a description of each error will be inserted. May be 0, in whcich case no error descriptions will be generated.
|
||||
*/
|
||||
void highlight_shell(const wcstring &buffstr, std::vector<int> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars);
|
||||
void highlight_shell_magic(const wcstring &buffstr, std::vector<int> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars);
|
||||
|
||||
/**
|
||||
Perform syntax highlighting for the text in buff. Matching quotes and paranthesis are highlighted. The result is
|
||||
|
|
|
@ -135,14 +135,12 @@ RESOLVE(statement)
|
|||
return 2;
|
||||
|
||||
case parse_keyword_else:
|
||||
//symbol_stack_pop();
|
||||
return NO_PRODUCTION;
|
||||
|
||||
case parse_keyword_switch:
|
||||
return 3;
|
||||
|
||||
case parse_keyword_end:
|
||||
PARSER_DIE(); //todo
|
||||
return NO_PRODUCTION;
|
||||
|
||||
// 'in' is only special within a for_header
|
||||
|
@ -378,7 +376,7 @@ RESOLVE(arguments_or_redirections_list)
|
|||
|
||||
PRODUCTIONS(argument_or_redirection) =
|
||||
{
|
||||
{parse_token_type_string},
|
||||
{symbol_argument},
|
||||
{parse_token_type_redirection}
|
||||
};
|
||||
RESOLVE(argument_or_redirection)
|
||||
|
@ -394,6 +392,18 @@ RESOLVE(argument_or_redirection)
|
|||
}
|
||||
}
|
||||
|
||||
PRODUCTIONS(argument) =
|
||||
{
|
||||
{parse_token_type_string}
|
||||
};
|
||||
RESOLVE_ONLY(argument)
|
||||
|
||||
PRODUCTIONS(redirection) =
|
||||
{
|
||||
{parse_token_type_redirection}
|
||||
};
|
||||
RESOLVE_ONLY(redirection)
|
||||
|
||||
PRODUCTIONS(optional_background) =
|
||||
{
|
||||
{},
|
||||
|
@ -449,6 +459,8 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n
|
|||
TEST(plain_statement)
|
||||
TEST(arguments_or_redirections_list)
|
||||
TEST(argument_or_redirection)
|
||||
TEST(argument)
|
||||
TEST(redirection)
|
||||
TEST(optional_background)
|
||||
|
||||
case parse_token_type_string:
|
||||
|
@ -461,6 +473,14 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n
|
|||
PARSER_DIE();
|
||||
break;
|
||||
|
||||
case parse_special_type_parse_error:
|
||||
case parse_special_type_tokenizer_error:
|
||||
case parse_special_type_comment:
|
||||
fprintf(stderr, "Special type %ls passed to %s\n", token_type_description(node_type).c_str(), __FUNCTION__);
|
||||
PARSER_DIE();
|
||||
break;
|
||||
|
||||
|
||||
case token_type_invalid:
|
||||
fprintf(stderr, "token_type_invalid passed to %s\n", __FUNCTION__);
|
||||
PARSER_DIE();
|
||||
|
|
200
parse_tree.cpp
200
parse_tree.cpp
|
@ -101,6 +101,11 @@ wcstring token_type_description(parse_token_type_t type)
|
|||
return L"arguments_or_redirections_list";
|
||||
case symbol_argument_or_redirection:
|
||||
return L"argument_or_redirection";
|
||||
case symbol_argument:
|
||||
return L"symbol_argument";
|
||||
case symbol_redirection:
|
||||
return L"symbol_redirection";
|
||||
|
||||
|
||||
case parse_token_type_string:
|
||||
return L"token_string";
|
||||
|
@ -116,6 +121,14 @@ wcstring token_type_description(parse_token_type_t type)
|
|||
return L"token_terminate";
|
||||
case symbol_optional_background:
|
||||
return L"optional_background";
|
||||
|
||||
case parse_special_type_parse_error:
|
||||
return L"parse_error";
|
||||
case parse_special_type_tokenizer_error:
|
||||
return L"tokenizer_error";
|
||||
case parse_special_type_comment:
|
||||
return L"comment";
|
||||
|
||||
}
|
||||
return format_string(L"Unknown token type %ld", static_cast<long>(type));
|
||||
}
|
||||
|
@ -217,6 +230,14 @@ static parse_token_t parse_token_from_tokenizer_token(enum token_type tokenizer_
|
|||
result.type = parse_token_type_redirection;
|
||||
break;
|
||||
|
||||
case TOK_ERROR:
|
||||
result.type = parse_special_type_tokenizer_error;
|
||||
break;
|
||||
|
||||
case TOK_COMMENT:
|
||||
result.type = parse_special_type_comment;
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
fprintf(stderr, "Bad token type %d passed to %s\n", (int)tokenizer_token_type, __FUNCTION__);
|
||||
|
@ -248,9 +269,16 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &
|
|||
}
|
||||
if (node.type == parse_token_type_string)
|
||||
{
|
||||
result->append(L": \"");
|
||||
result->append(src, node.source_start, node.source_length);
|
||||
result->append(L"\"");
|
||||
if (node.source_start == -1)
|
||||
{
|
||||
append_format(*result, L" (no source)");
|
||||
}
|
||||
else
|
||||
{
|
||||
result->append(L": \"");
|
||||
result->append(src, node.source_start, node.source_length);
|
||||
result->append(L"\"");
|
||||
}
|
||||
}
|
||||
result->push_back(L'\n');
|
||||
++*line;
|
||||
|
@ -311,21 +339,25 @@ class parse_ll_t
|
|||
// Constructor
|
||||
parse_ll_t() : fatal_errored(false)
|
||||
{
|
||||
// initial node
|
||||
symbol_stack.push_back(parse_stack_element_t(symbol_job_list, 0)); // goal token
|
||||
nodes.push_back(parse_node_t(symbol_job_list));
|
||||
this->reset();
|
||||
}
|
||||
|
||||
bool top_node_match_token(parse_token_t token);
|
||||
|
||||
void accept_token(parse_token_t token, const wcstring &src);
|
||||
|
||||
// Clear the parse symbol stack (but not the node tree). Add a new job_list_t goal node.
|
||||
void reset(void);
|
||||
|
||||
void parse_error(const wchar_t *expected, parse_token_t token);
|
||||
void parse_error(parse_token_t token, const wchar_t *format, ...);
|
||||
void append_error_callout(wcstring &error_message, parse_token_t token);
|
||||
|
||||
void dump_stack(void) const;
|
||||
|
||||
// Figure out the ranges of intermediate nodes
|
||||
void determine_node_ranges();
|
||||
|
||||
// Get the node corresponding to the top element of the stack
|
||||
parse_node_t &node_for_top_symbol()
|
||||
{
|
||||
|
@ -453,9 +485,41 @@ void parse_ll_t::dump_stack(void) const
|
|||
}
|
||||
}
|
||||
|
||||
// Give each node a source range equal to the union of the ranges of its children
|
||||
// Terminal nodes already have source ranges (and no children)
|
||||
// Since children always appear after their parents, we can implement this very simply by walking backwards
|
||||
void parse_ll_t::determine_node_ranges(void)
|
||||
{
|
||||
const size_t source_start_invalid = -1;
|
||||
size_t idx = nodes.size();
|
||||
while (idx--)
|
||||
{
|
||||
parse_node_t *parent = &nodes.at(idx);
|
||||
|
||||
// Skip nodes that already have a source range. These are terminal nodes.
|
||||
if (parent->source_start != source_start_invalid)
|
||||
continue;
|
||||
|
||||
// Ok, this node needs a source range. Get all of its children, and then set its range.
|
||||
size_t min_start = source_start_invalid, max_end = 0; //note source_start_invalid is huge
|
||||
for (node_offset_t i=0; i < parent->child_count; i++)
|
||||
{
|
||||
const parse_node_t &child = nodes.at(parent->child_offset(i));
|
||||
min_start = std::min(min_start, child.source_start);
|
||||
max_end = std::max(max_end, child.source_start + child.source_length);
|
||||
}
|
||||
|
||||
if (min_start != source_start_invalid) {
|
||||
assert(max_end >= min_start);
|
||||
parent->source_start = min_start;
|
||||
parent->source_length = max_end - min_start;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void parse_ll_t::parse_error(parse_token_t token, const wchar_t *fmt, ...)
|
||||
{
|
||||
this->dump_stack();
|
||||
//this->dump_stack();
|
||||
parse_error_t err;
|
||||
|
||||
va_list va;
|
||||
|
@ -481,8 +545,27 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token)
|
|||
fatal_errored = true;
|
||||
}
|
||||
|
||||
void parse_ll_t::reset(void)
|
||||
{
|
||||
// add a new job_list node and then reset our symbol list to point at it
|
||||
node_offset_t where = nodes.size();
|
||||
nodes.push_back(parse_node_t(symbol_job_list));
|
||||
|
||||
symbol_stack.clear();
|
||||
symbol_stack.push_back(parse_stack_element_t(symbol_job_list, where)); // goal token
|
||||
this->fatal_errored = false;
|
||||
}
|
||||
|
||||
|
||||
bool parse_ll_t::top_node_match_token(parse_token_t token)
|
||||
{
|
||||
if (symbol_stack.empty())
|
||||
{
|
||||
// This can come about with an unbalanced 'end' or 'else', which causes us to terminate the outermost job list.
|
||||
this->fatal_errored = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
PARSE_ASSERT(! symbol_stack.empty());
|
||||
PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
|
||||
bool result = false;
|
||||
|
@ -520,10 +603,23 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
|
|||
fprintf(stderr, "Accept token %ls\n", token.describe().c_str());
|
||||
}
|
||||
PARSE_ASSERT(token.type >= FIRST_PARSE_TOKEN_TYPE);
|
||||
PARSE_ASSERT(! symbol_stack.empty());
|
||||
|
||||
bool consumed = false;
|
||||
|
||||
// Handle special types specially. Note that these are the only types that can be pushed if the symbol stack is empty.
|
||||
if (token.type == parse_special_type_parse_error || token.type == parse_special_type_tokenizer_error || token.type == parse_special_type_comment)
|
||||
{
|
||||
parse_node_t err_node(token.type);
|
||||
err_node.source_start = token.source_start;
|
||||
err_node.source_length = token.source_length;
|
||||
nodes.push_back(err_node);
|
||||
consumed = true;
|
||||
}
|
||||
|
||||
while (! consumed && ! this->fatal_errored)
|
||||
{
|
||||
PARSE_ASSERT(! symbol_stack.empty());
|
||||
|
||||
if (top_node_match_token(token))
|
||||
{
|
||||
if (logit)
|
||||
|
@ -534,6 +630,10 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
|
|||
break;
|
||||
}
|
||||
|
||||
// top_node_match_token may indicate an error if our stack is empty
|
||||
if (this->fatal_errored)
|
||||
break;
|
||||
|
||||
// Get the production for the top of the stack
|
||||
parse_stack_element_t &stack_elem = symbol_stack.back();
|
||||
parse_node_t &node = nodes.at(stack_elem.node_idx);
|
||||
|
@ -548,6 +648,12 @@ void parse_ll_t::accept_token(parse_token_t token, const wcstring &src)
|
|||
// Manipulate the symbol stack.
|
||||
// Note that stack_elem is invalidated by popping the stack.
|
||||
symbol_stack_pop_push_production(production);
|
||||
|
||||
// If we end up with an empty stack, something bad happened, like an unbalanced end
|
||||
if (symbol_stack.empty())
|
||||
{
|
||||
this->parse_error(token, L"All symbols removed from symbol stack. Likely unbalanced else or end?", stack_elem.describe().c_str(), token.describe().c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -556,6 +662,11 @@ parse_t::parse_t() : parser(new parse_ll_t())
|
|||
{
|
||||
}
|
||||
|
||||
parse_t::~parse_t()
|
||||
{
|
||||
delete parser;
|
||||
}
|
||||
|
||||
static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt)
|
||||
{
|
||||
parse_keyword_t result = parse_keyword_none;
|
||||
|
@ -597,21 +708,20 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt)
|
|||
return result;
|
||||
}
|
||||
|
||||
bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors)
|
||||
bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it)
|
||||
{
|
||||
tokenizer_t tok = tokenizer_t(str.c_str(), 0);
|
||||
tok_flags_t tok_options = TOK_SQUASH_ERRORS;
|
||||
if (parse_flags & parse_flag_include_comments)
|
||||
tok_options |= TOK_SHOW_COMMENTS;
|
||||
|
||||
tokenizer_t tok = tokenizer_t(str.c_str(), tok_options);
|
||||
for (; tok_has_next(&tok) && ! this->parser->fatal_errored; tok_next(&tok))
|
||||
{
|
||||
token_type tok_type = static_cast<token_type>(tok_last_type(&tok));
|
||||
const wchar_t *tok_txt = tok_last(&tok);
|
||||
int tok_start = tok_get_pos(&tok);
|
||||
size_t tok_extent = tok_get_extent(&tok);
|
||||
|
||||
if (tok_type == TOK_ERROR)
|
||||
{
|
||||
fprintf(stderr, "Tokenizer error\n");
|
||||
break;
|
||||
}
|
||||
assert(tok_extent < 10000000); //paranoia
|
||||
|
||||
parse_token_t token = parse_token_from_tokenizer_token(tok_type);
|
||||
token.tokenizer_type = tok_type;
|
||||
|
@ -621,12 +731,31 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_
|
|||
this->parser->accept_token(token, str);
|
||||
|
||||
if (this->parser->fatal_errored)
|
||||
break;
|
||||
{
|
||||
if (parse_flags & parse_flag_continue_after_error)
|
||||
{
|
||||
/* Mark an error and then keep going */
|
||||
token.type = parse_special_type_parse_error;
|
||||
token.keyword = parse_keyword_none;
|
||||
this->parser->accept_token(token, str);
|
||||
this->parser->reset();
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Bail out */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Teach each node where its source range is
|
||||
this->parser->determine_node_ranges();
|
||||
|
||||
#if 0
|
||||
wcstring result = dump_tree(this->parser->nodes, str);
|
||||
fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str());
|
||||
fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t));
|
||||
#endif
|
||||
|
||||
if (output != NULL)
|
||||
{
|
||||
|
@ -642,3 +771,40 @@ bool parse_t::parse(const wcstring &str, parse_node_tree_t *output, parse_error_
|
|||
|
||||
return ! this->parser->fatal_errored;
|
||||
}
|
||||
|
||||
const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type) const
|
||||
{
|
||||
const parse_node_t *result = NULL;
|
||||
PARSE_ASSERT(which < parent.child_count);
|
||||
node_offset_t child_offset = parent.child_offset(which);
|
||||
if (child_offset < this->size())
|
||||
{
|
||||
result = &this->at(child_offset);
|
||||
}
|
||||
|
||||
// If we are given an expected type, then the node must be null or that type
|
||||
if (result != NULL)
|
||||
{
|
||||
assert(expected_type == token_type_invalid || expected_type == result->type);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node_t &parent, parse_token_type_t type, parse_node_tree_t::parse_node_list_t *result)
|
||||
{
|
||||
if (parent.type == type) result->push_back(&parent);
|
||||
for (size_t i=0; i < parent.child_count; i++)
|
||||
{
|
||||
const parse_node_t *child = tree.get_child(parent, i);
|
||||
assert(child != NULL);
|
||||
find_nodes_recursive(tree, *child, type, result);
|
||||
}
|
||||
}
|
||||
|
||||
parse_node_tree_t::parse_node_list_t parse_node_tree_t::find_nodes(const parse_node_t &parent, parse_token_type_t type) const
|
||||
{
|
||||
parse_node_list_t result;
|
||||
find_nodes_recursive(*this, parent, type, &result);
|
||||
return result;
|
||||
}
|
||||
|
|
43
parse_tree.h
43
parse_tree.h
|
@ -15,7 +15,7 @@
|
|||
#include <vector>
|
||||
|
||||
#define PARSE_ASSERT(a) assert(a)
|
||||
#define PARSER_DIE() exit_without_destructors(-1)
|
||||
#define PARSER_DIE() do { fprintf(stderr, "Parser dying!\n"); exit_without_destructors(-1); } while (0)
|
||||
|
||||
class parse_node_t;
|
||||
class parse_node_tree_t;
|
||||
|
@ -36,6 +36,18 @@ struct parse_error_t
|
|||
};
|
||||
typedef std::vector<parse_error_t> parse_error_list_t;
|
||||
|
||||
enum
|
||||
{
|
||||
parse_flag_none = 0,
|
||||
|
||||
/* Attempt to build a "parse tree" no matter what. This may result in a 'forest' of disconnected trees. This is intended to be used by syntax highlighting. */
|
||||
parse_flag_continue_after_error = 1 << 0,
|
||||
|
||||
/* Include comment tokens */
|
||||
parse_flag_include_comments = 1 << 1
|
||||
};
|
||||
typedef unsigned int parse_tree_flags_t;
|
||||
|
||||
class parse_ll_t;
|
||||
class parse_t
|
||||
{
|
||||
|
@ -43,7 +55,8 @@ class parse_t
|
|||
|
||||
public:
|
||||
parse_t();
|
||||
bool parse(const wcstring &str, parse_node_tree_t *output, parse_error_list_t *errors);
|
||||
~parse_t();
|
||||
bool parse(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, bool log_it = false);
|
||||
};
|
||||
|
||||
enum parse_token_type_t
|
||||
|
@ -80,6 +93,9 @@ enum parse_token_type_t
|
|||
symbol_argument_list_nonempty,
|
||||
symbol_argument_list,
|
||||
|
||||
symbol_argument,
|
||||
symbol_redirection,
|
||||
|
||||
symbol_optional_background,
|
||||
|
||||
// Terminal types
|
||||
|
@ -90,6 +106,11 @@ enum parse_token_type_t
|
|||
parse_token_type_end,
|
||||
parse_token_type_terminate,
|
||||
|
||||
// Very special terminal types that don't appear in the production list
|
||||
parse_special_type_parse_error,
|
||||
parse_special_type_tokenizer_error,
|
||||
parse_special_type_comment,
|
||||
|
||||
LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
|
||||
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
|
||||
};
|
||||
|
@ -145,7 +166,7 @@ public:
|
|||
wcstring describe(void) const;
|
||||
|
||||
/* Constructor */
|
||||
explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(0), source_length(0), child_start(0), child_count(0), tag(0)
|
||||
explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), child_start(0), child_count(0), tag(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -154,10 +175,23 @@ public:
|
|||
PARSE_ASSERT(which < child_count);
|
||||
return child_start + which;
|
||||
}
|
||||
|
||||
bool has_source() const
|
||||
{
|
||||
return source_start != (size_t)(-1);
|
||||
}
|
||||
};
|
||||
|
||||
class parse_node_tree_t : public std::vector<parse_node_t>
|
||||
{
|
||||
public:
|
||||
|
||||
/* Get the node corresponding to a child of the given node, or NULL if there is no such child. If expected_type is provided, assert that the node has that type. */
|
||||
const parse_node_t *get_child(const parse_node_t &parent, node_offset_t which, parse_token_type_t expected_type = token_type_invalid) const;
|
||||
|
||||
/* Find all the nodes of a given type underneath a given node */
|
||||
typedef std::vector<const parse_node_t *> parse_node_list_t;
|
||||
parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const;
|
||||
};
|
||||
|
||||
|
||||
|
@ -214,7 +248,8 @@ class parse_node_tree_t : public std::vector<parse_node_t>
|
|||
|
||||
arguments_or_redirections_list = <empty> |
|
||||
argument_or_redirection arguments_or_redirections_list
|
||||
argument_or_redirection = redirection | <TOK_STRING>
|
||||
argument_or_redirection = argument | redirection
|
||||
argument = <TOK_STRING>
|
||||
redirection = <TOK_REDIRECTION>
|
||||
|
||||
terminator = <TOK_END> | <TOK_BACKGROUND>
|
||||
|
|
Loading…
Reference in a new issue