Adoption of new parser in abbreviations

This commit is contained in:
ridiculousfish 2013-10-09 02:03:50 -07:00
parent a51bd03a5c
commit 7b86b2e05a
7 changed files with 115 additions and 162 deletions

View file

@ -61,7 +61,6 @@
#include "signal.h"
#include "highlight.h"
#include "parse_tree.h"
#include "parse_exec.h"
#include "parse_util.h"
/**
@ -769,6 +768,11 @@ static void test_abbreviations(void)
expanded = reader_expand_abbreviation_in_command(L"of gc", wcslen(L"of gc"), &result);
if (expanded) err(L"gc incorrectly expanded on line %ld", (long)__LINE__);
/* others should not be */
expanded = reader_expand_abbreviation_in_command(L"command gc", wcslen(L"command gc"), &result);
if (expanded) err(L"gc incorrectly expanded on line %ld", (long)__LINE__);
env_pop();
}
@ -1916,12 +1920,16 @@ static void test_new_parser_fuzzing(void)
size_t max = 5;
for (size_t len=1; len <= max; len++)
{
fprintf(stderr, "%lu / %lu\n", len, max);
fprintf(stderr, "%lu / %lu...", len, max);
std::vector<parser_fuzz_token_t> tokens(len);
size_t count = 0;
parse_t parser;
parse_node_tree_t parse_tree;
do
{
parse_t parser;
parse_node_tree_t parse_tree;
parser.clear();
parse_tree.clear();
count++;
for (size_t i=0; i < len; i++)
{
const parser_fuzz_token_t &token = tokens[i];
@ -1931,6 +1939,7 @@ static void test_new_parser_fuzzing(void)
// keep going until we wrap
}
while (! increment(tokens));
fprintf(stderr, "done (%lu)\n", count);
}
double end = timef();
say(L"All fuzzed in %f seconds!", end - start);
@ -2108,7 +2117,7 @@ int main(int argc, char **argv)
say(L"Testing low-level functionality");
set_main_thread();
setup_fork_guards();
//proc_init();
//proc_init(); //disabling this prevents catching SIGINT
event_init();
function_init();
builtin_init();
@ -2116,7 +2125,6 @@ int main(int argc, char **argv)
env_init();
test_highlighting();
return 0;
test_new_parser_fuzzing();
test_new_parser_correctness();
test_highlighting();

View file

@ -332,7 +332,7 @@ static bool is_potential_cd_path(const wcstring &path, const wcstring &working_d
}
/* Given a plain statement node in a parse tree, get the command and return it, expanded appropriately for commands. If we succeed, return true. */
static bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd)
bool plain_statement_get_expanded_command(const wcstring &src, const parse_node_tree_t &tree, const parse_node_t &plain_statement, wcstring *out_cmd)
{
assert(plain_statement.type == symbol_plain_statement);
bool result = false;
@ -708,15 +708,15 @@ static bool has_expand_reserved(const wcstring &str)
return result;
}
/* Parse a command line. Return by reference the last command, its arguments, and the offset in the string of the beginning of the last argument. This is used by autosuggestions */
static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expanded_command, const parse_node_t **out_last_arg)
/* Parse a command line. Return by reference the last command, and the last argument to that command (as a copied node), if any. This is used by autosuggestions */
static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expanded_command, parse_node_t *out_last_arg)
{
bool result = false;
/* Parse the buffer */
parse_node_tree_t parse_tree;
parse_t parser;
parser.parse(buff, parse_flag_continue_after_error, &parse_tree, NULL);
parser.parse(buff, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
/* Find the last statement */
const parse_node_t *last_statement = parse_tree.find_last_node_of_type(symbol_plain_statement, NULL);
@ -727,8 +727,12 @@ static bool autosuggest_parse_command(const wcstring &buff, wcstring *out_expand
/* We got it */
result = true;
/* Find the last argument */
*out_last_arg = parse_tree.find_last_node_of_type(symbol_plain_statement, last_statement);
/* Find the last argument. If we don't get one, return an invalid node. */
const parse_node_t *last_arg = parse_tree.find_last_node_of_type(symbol_argument, last_statement);
if (last_arg != NULL)
{
*out_last_arg = *last_arg;
}
}
}
return result;
@ -739,20 +743,20 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di
{
if (str.empty())
return false;
ASSERT_IS_BACKGROUND_THREAD();
/* Parse the string */
wcstring parsed_command;
const parse_node_t *last_arg_node = NULL;
parse_node_t last_arg_node(token_type_invalid);
if (! autosuggest_parse_command(str, &parsed_command, &last_arg_node))
return false;
bool result = false;
if (parsed_command == L"cd" && last_arg_node != NULL && last_arg_node->has_source())
if (parsed_command == L"cd" && last_arg_node.type == symbol_argument && last_arg_node.has_source())
{
/* We can possibly handle this specially */
const wcstring escaped_dir = last_arg_node->get_source(str);
const wcstring escaped_dir = last_arg_node.get_source(str);
wcstring suggested_path;
/* We always return true because we recognized the command. This prevents us from falling back to dumber algorithms; for example we won't suggest a non-directory for the cd command. */
@ -771,13 +775,12 @@ bool autosuggest_suggest_special(const wcstring &str, const wcstring &working_di
path_flags_t path_flags = (quote == L'\0') ? PATH_EXPAND_TILDE : 0;
if (unescaped && is_potential_cd_path(unescaped_dir, working_directory, path_flags, &suggested_path))
{
/* Note: this looks really wrong for strings that have an "unescapable" character in them, e.g. a \t, because parse_util_escape_string_with_quote will insert that character */
wcstring escaped_suggested_path = parse_util_escape_string_with_quote(suggested_path, quote);
/* Return it */
out_suggestion = str;
out_suggestion.erase(last_arg_node->source_start);
out_suggestion.erase(last_arg_node.source_start);
if (quote != L'\0') out_suggestion.push_back(quote);
out_suggestion.append(escaped_suggested_path);
if (quote != L'\0') out_suggestion.push_back(quote);
@ -798,14 +801,14 @@ bool autosuggest_validate_from_history(const history_item_t &item, file_detectio
/* Parse the string */
wcstring parsed_command;
const parse_node_t *last_arg_node = NULL;
parse_node_t last_arg_node(token_type_invalid);
if (! autosuggest_parse_command(item.str(), &parsed_command, &last_arg_node))
return false;
if (parsed_command == L"cd" && last_arg_node != NULL && last_arg_node->has_source())
if (parsed_command == L"cd" && last_arg_node.type == symbol_argument && last_arg_node.has_source())
{
/* We can possibly handle this specially */
wcstring dir = last_arg_node->get_source(item.str());
wcstring dir = last_arg_node.get_source(item.str());
if (expand_one(dir, EXPAND_SKIP_CMDSUBST))
{
handled = true;
@ -1968,12 +1971,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight()
case symbol_plain_statement:
{
// Get the decoration from the parent
enum parse_statement_decoration_t decoration = parse_statement_decoration_none;
const parse_node_t *decorated_statement = parse_tree.get_parent(node, symbol_decorated_statement);
if (decorated_statement != NULL)
{
decoration = static_cast<enum parse_statement_decoration_t>(decorated_statement->production_idx);
}
enum parse_statement_decoration_t decoration = parse_tree.decoration_for_plain_statement(node);
/* Color the command */
const parse_node_t *cmd_node = parse_tree.get_child(node, 0, parse_token_type_string);

View file

@ -27,8 +27,8 @@ static bool production_is_valid(const production_options_t production_list, prod
}
#define PRODUCTIONS(sym) static const production_options_t productions_##sym
#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag)
#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) { return 0; }
#define RESOLVE(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword)
#define RESOLVE_ONLY(sym) static production_option_idx_t resolve_##sym (parse_token_type_t token_type, parse_keyword_t token_keyword) { return 0; }
#define KEYWORD(x) ((x) + LAST_TOKEN_OR_SYMBOL + 1)
@ -418,7 +418,7 @@ RESOLVE(optional_background)
}
#define TEST(sym) case (symbol_##sym): production_list = & productions_ ## sym ; resolver = resolve_ ## sym ; break;
const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, production_tag_t *out_tag, wcstring *out_error_text)
const production_t *parse_productions::production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_which_production, wcstring *out_error_text)
{
bool log_it = false;
if (log_it)
@ -428,7 +428,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n
/* Fetch the list of productions and the function to resolve them */
const production_options_t *production_list = NULL;
production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword, production_tag_t *tag) = NULL;
production_option_idx_t (*resolver)(parse_token_type_t token_type, parse_keyword_t token_keyword) = NULL;
switch (node_type)
{
TEST(job_list)
@ -486,7 +486,7 @@ const production_t *parse_productions::production_for_token(parse_token_type_t n
PARSE_ASSERT(resolver != NULL);
const production_t *result = NULL;
production_option_idx_t which = resolver(input_type, input_keyword, out_tag);
production_option_idx_t which = resolver(input_type, input_keyword);
if (log_it)
{

View file

@ -63,7 +63,7 @@ inline bool production_element_is_valid(production_element_t elem)
}
/* Fetch a production */
const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, production_tag_t *out_tag, wcstring *out_error_text);
const production_t *production_for_token(parse_token_type_t node_type, parse_token_type_t input_type, parse_keyword_t input_keyword, production_option_idx_t *out_idx, wcstring *out_error_text);
}

View file

@ -720,7 +720,7 @@ void parse_ll_t::accept_token(parse_token_t token)
// Get the production for the top of the stack
parse_stack_element_t &stack_elem = symbol_stack.back();
parse_node_t &node = nodes.at(stack_elem.node_idx);
const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, &node.tag, NULL /* error text */);
const production_t *production = production_for_token(stack_elem.type, token.type, token.keyword, &node.production_idx, NULL /* error text */);
if (production == NULL)
{
if (should_generate_error_messages)
@ -804,6 +804,9 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
if (parse_flags & parse_flag_include_comments)
tok_options |= TOK_SHOW_COMMENTS;
if (parse_flags & parse_flag_accept_incomplete_tokens)
tok_options |= TOK_ACCEPT_UNFINISHED;
this->parser->set_should_generate_error_messages(errors != NULL);
tokenizer_t tok = tokenizer_t(str.c_str(), tok_options);
@ -845,14 +848,14 @@ bool parse_t::parse(const wcstring &str, parse_tree_flags_t parse_flags, parse_n
// Tag nodes
#if 0
wcstring result = dump_tree(this->parser->nodes, str);
fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str());
fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", this->parser->nodes.size(), sizeof(parse_node_t), this->parser->nodes.size() * sizeof(parse_node_t));
#endif
// Acquire the output from the parser
this->parser->acquire_output(output, errors);
#if 0
//wcstring result = dump_tree(this->parser->nodes, str);
//fprintf(stderr, "Tree (%ld nodes):\n%ls", this->parser->nodes.size(), result.c_str());
fprintf(stderr, "%lu nodes, node size %lu, %lu bytes\n", output->size(), sizeof(parse_node_t), output->size() * sizeof(parse_node_t));
#endif
// Indicate if we had a fatal error
return ! this->parser->has_fatal_error();
@ -992,3 +995,15 @@ bool parse_node_tree_t::argument_list_is_root(const parse_node_t &node) const
}
return result;
}
enum parse_statement_decoration_t parse_node_tree_t::decoration_for_plain_statement(const parse_node_t &node) const
{
assert(node.type == symbol_plain_statement);
enum parse_statement_decoration_t decoration = parse_statement_decoration_none;
const parse_node_t *decorated_statement = this->get_parent(node, symbol_decorated_statement);
if (decorated_statement != NULL)
{
decoration = static_cast<enum parse_statement_decoration_t>(decorated_statement->production_idx);
}
return decoration;
}

View file

@ -125,7 +125,10 @@ enum
parse_flag_continue_after_error = 1 << 0,
/* Include comment tokens */
parse_flag_include_comments = 1 << 1
parse_flag_include_comments = 1 << 1,
/* Indicate that the tokenizer should accept incomplete tokens */
parse_flag_accept_incomplete_tokens = 1 << 2
};
typedef unsigned int parse_tree_flags_t;
@ -175,9 +178,6 @@ public:
node_offset_t child_start;
node_offset_t child_count;
/* Type-dependent data */
uint32_t tag;
/* Which production was used */
uint8_t production_idx;
@ -185,7 +185,7 @@ public:
wcstring describe(void) const;
/* Constructor */
explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), tag(0)
explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0)
{
}
@ -211,6 +211,15 @@ public:
}
};
/* Statement decorations. This matches the order of productions in decorated_statement */
enum parse_statement_decoration_t
{
parse_statement_decoration_none,
parse_statement_decoration_command,
parse_statement_decoration_builtin
};
/* The parse tree itself */
class parse_node_tree_t : public std::vector<parse_node_t>
{
@ -232,27 +241,10 @@ public:
/* Indicate if the given argument_list or arguments_or_redirections_list is a root list, or has a parent */
bool argument_list_is_root(const parse_node_t &node) const;
};
/* Node type specific data, stored in the tag field */
/* Statement decorations, stored in the tag of plain_statement. This matches the order of productions in decorated_statement */
enum parse_statement_decoration_t
{
parse_statement_decoration_none,
parse_statement_decoration_command,
parse_statement_decoration_builtin
};
/* Argument flags as a bitmask, stored in the tag of argument */
enum parse_argument_flags_t
{
/* Indicates that this or a prior argument was --, so this should not be treated as an option */
parse_argument_no_options = 1 << 0,
/* Indicates that the argument is for a cd command */
parse_argument_is_for_cd = 1 << 1
/* Utilities */
enum parse_statement_decoration_t decoration_for_plain_statement(const parse_node_t &node) const;
};
/* Fish grammar:

View file

@ -99,6 +99,7 @@ commence.
#include "path.h"
#include "parse_util.h"
#include "parser_keywords.h"
#include "parse_tree.h"
/**
Maximum length of prefix string when printing completion
@ -659,117 +660,56 @@ bool reader_expand_abbreviation_in_command(const wcstring &cmdline, size_t curso
const size_t subcmd_offset = cmdsub_begin - buff;
const wcstring subcmd = wcstring(cmdsub_begin, cmdsub_end - cmdsub_begin);
const wchar_t *subcmd_cstr = subcmd.c_str();
/* Get the token containing the cursor */
const wchar_t *subcmd_tok_begin = NULL, *subcmd_tok_end = NULL;
assert(cursor_pos >= subcmd_offset);
size_t subcmd_cursor_pos = cursor_pos - subcmd_offset;
parse_util_token_extent(subcmd_cstr, subcmd_cursor_pos, &subcmd_tok_begin, &subcmd_tok_end, NULL, NULL);
/* Compute the offset of the token before the cursor within the subcmd */
assert(subcmd_tok_begin >= subcmd_cstr);
assert(subcmd_tok_end >= subcmd_tok_begin);
const size_t subcmd_tok_begin_offset = subcmd_tok_begin - subcmd_cstr;
const size_t subcmd_tok_length = subcmd_tok_end - subcmd_tok_begin;
/* Now parse the subcmd, looking for commands */
bool had_cmd = false, previous_token_is_cmd = false;
tokenizer_t tok(subcmd_cstr, TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
for (; tok_has_next(&tok); tok_next(&tok))
const size_t subcmd_cursor_pos = cursor_pos - subcmd_offset;
/* Parse this subcmd */
parse_node_tree_t parse_tree;
parse_t parser;
parser.parse(subcmd, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &parse_tree, NULL);
/* Look for plain statements where the cursor is at the end of the command */
const parse_node_t *matching_cmd_node = NULL;
const size_t len = parse_tree.size();
for (size_t i=0; i < len; i++)
{
size_t tok_pos = static_cast<size_t>(tok_get_pos(&tok));
if (tok_pos > subcmd_tok_begin_offset)
const parse_node_t &node = parse_tree.at(i);
/* Only interested in plain statements with source */
if (node.type != symbol_plain_statement || ! node.has_source())
continue;
/* Skip decorated statements */
if (parse_tree.decoration_for_plain_statement(node) != parse_statement_decoration_none)
continue;
/* Get the command node. Skip it if we can't or it has no source */
const parse_node_t *cmd_node = parse_tree.get_child(node, 0, parse_token_type_string);
if (cmd_node == NULL || ! cmd_node->has_source())
continue;
/* Now see if its source range contains our cursor, including at the end */
if (subcmd_cursor_pos >= cmd_node->source_start && subcmd_cursor_pos <= cmd_node->source_start + cmd_node->source_length)
{
/* We've passed the token we're interested in */
/* Success! */
matching_cmd_node = cmd_node;
break;
}
int last_type = tok_last_type(&tok);
switch (last_type)
{
case TOK_STRING:
{
if (had_cmd)
{
/* Parameter to the command. */
}
else
{
const wcstring potential_cmd = tok_last(&tok);
if (parser_keywords_is_subcommand(potential_cmd))
{
if (potential_cmd == L"command" || potential_cmd == L"builtin")
{
/* 'command' and 'builtin' defeat abbreviation expansion. Skip this command. */
had_cmd = true;
}
else
{
/* Other subcommand. Pretend it doesn't exist so that we can expand the following command */
had_cmd = false;
}
}
else
{
/* It's a normal command */
had_cmd = true;
if (tok_pos == subcmd_tok_begin_offset)
{
/* This is the token we care about! */
previous_token_is_cmd = true;
}
}
}
break;
}
case TOK_REDIRECT_NOCLOB:
case TOK_REDIRECT_OUT:
case TOK_REDIRECT_IN:
case TOK_REDIRECT_APPEND:
case TOK_REDIRECT_FD:
{
if (!had_cmd)
{
break;
}
tok_next(&tok);
break;
}
case TOK_PIPE:
case TOK_BACKGROUND:
case TOK_END:
{
had_cmd = false;
break;
}
case TOK_COMMENT:
case TOK_ERROR:
default:
{
break;
}
}
}
/* Now if we found a command node, expand it */
bool result = false;
if (previous_token_is_cmd)
if (matching_cmd_node != NULL)
{
/* The token is a command. Try expanding it as an abbreviation. */
const wcstring token = wcstring(subcmd, subcmd_tok_begin_offset, subcmd_tok_length);
assert(matching_cmd_node->type == parse_token_type_string);
const wcstring token = matching_cmd_node->get_source(subcmd);
wcstring abbreviation;
if (expand_abbreviation(token, &abbreviation))
{
/* There was an abbreviation! Replace the token in the full command. Maintain the relative position of the cursor. */
if (output != NULL)
{
size_t cmd_tok_begin_offset = subcmd_tok_begin_offset + subcmd_offset;
output->assign(cmdline);
output->replace(cmd_tok_begin_offset, subcmd_tok_length, abbreviation);
output->replace(subcmd_offset + matching_cmd_node->source_start, matching_cmd_node->source_length, abbreviation);
}
result = true;
}