mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-25 20:33:08 +00:00
Rewriting indenting functionality to use new parser
This commit is contained in:
parent
a23441109d
commit
dd0cc5ed9f
9 changed files with 328 additions and 38 deletions
194
fish_tests.cpp
194
fish_tests.cpp
|
@ -64,23 +64,32 @@
|
|||
#include "parse_util.h"
|
||||
|
||||
static const char * const * s_arguments;
|
||||
static int s_test_run_count = 0;
|
||||
|
||||
/* Indicate if we should test the given function. Either we test everything (all arguments) or we run only tests that have a prefix in s_arguments */
|
||||
static bool should_test_function(const char *func_name)
|
||||
{
|
||||
/* No args, test everything */
|
||||
bool result = false;
|
||||
if (! s_arguments || ! s_arguments[0])
|
||||
return true;
|
||||
|
||||
for (size_t i=0; s_arguments[i] != NULL; i++)
|
||||
{
|
||||
if (! strncmp(func_name, s_arguments[i], strlen(s_arguments[i])))
|
||||
result = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i=0; s_arguments[i] != NULL; i++)
|
||||
{
|
||||
/* Prefix match */
|
||||
return true;
|
||||
if (! strncmp(func_name, s_arguments[i], strlen(s_arguments[i])))
|
||||
{
|
||||
/* Prefix match */
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
if (result)
|
||||
s_test_run_count++;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -640,6 +649,147 @@ static void test_parser()
|
|||
}
|
||||
}
|
||||
|
||||
static void test_indents()
|
||||
{
|
||||
say(L"Testing indents");
|
||||
|
||||
// Here are the components of our source and the indents we expect those to be
|
||||
struct indent_component_t {
|
||||
const wchar_t *txt;
|
||||
int indent;
|
||||
};
|
||||
|
||||
const indent_component_t components1[] =
|
||||
{
|
||||
{L"if foo", 0},
|
||||
{L"end", 0},
|
||||
{NULL, -1}
|
||||
};
|
||||
|
||||
const indent_component_t components2[] =
|
||||
{
|
||||
{L"if foo", 0},
|
||||
{L"", 1}, //trailing newline!
|
||||
{NULL, -1}
|
||||
};
|
||||
|
||||
const indent_component_t components3[] =
|
||||
{
|
||||
{L"if foo", 0},
|
||||
{L"foo", 1},
|
||||
{L"end", 0}, //trailing newline!
|
||||
{NULL, -1}
|
||||
};
|
||||
|
||||
const indent_component_t components4[] =
|
||||
{
|
||||
{L"if foo", 0},
|
||||
{L"if bar", 1},
|
||||
{L"end", 1},
|
||||
{L"end", 0},
|
||||
{L"", 0},
|
||||
{NULL, -1}
|
||||
};
|
||||
|
||||
const indent_component_t components5[] =
|
||||
{
|
||||
{L"if foo", 0},
|
||||
{L"if bar", 1},
|
||||
{L"", 2},
|
||||
{NULL, -1}
|
||||
};
|
||||
|
||||
const indent_component_t components6[] =
|
||||
{
|
||||
{L"begin", 0},
|
||||
{L"foo", 1},
|
||||
{L"", 1},
|
||||
{NULL, -1}
|
||||
};
|
||||
|
||||
const indent_component_t components7[] =
|
||||
{
|
||||
{L"begin; end", 0},
|
||||
{L"foo", 0},
|
||||
{L"", 0},
|
||||
{NULL, -1}
|
||||
};
|
||||
|
||||
const indent_component_t components8[] =
|
||||
{
|
||||
{L"if foo", 0},
|
||||
{L"if bar", 1},
|
||||
{L"baz", 2},
|
||||
{L"end", 1},
|
||||
{L"", 1},
|
||||
{NULL, -1}
|
||||
};
|
||||
|
||||
const indent_component_t components9[] =
|
||||
{
|
||||
{L"switch foo", 0},
|
||||
{L"", 1},
|
||||
{NULL, -1}
|
||||
};
|
||||
|
||||
const indent_component_t components10[] =
|
||||
{
|
||||
{L"switch foo", 0},
|
||||
{L"case bar", 1},
|
||||
{L"case baz", 1},
|
||||
{L"quux", 2},
|
||||
{L"", 2},
|
||||
{NULL, -1}
|
||||
};
|
||||
|
||||
|
||||
|
||||
const indent_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8, components9, components10};
|
||||
for (size_t which = 0; which < sizeof tests / sizeof *tests; which++)
|
||||
{
|
||||
const indent_component_t *components = tests[which];
|
||||
// Count how many we have
|
||||
size_t component_count = 0;
|
||||
while (components[component_count].txt != NULL)
|
||||
{
|
||||
component_count++;
|
||||
}
|
||||
|
||||
// Generate the expected indents
|
||||
wcstring text;
|
||||
std::vector<int> expected_indents;
|
||||
for (size_t i=0; i < component_count; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
text.push_back(L'\n');
|
||||
expected_indents.push_back(components[i].indent);
|
||||
}
|
||||
text.append(components[i].txt);
|
||||
expected_indents.resize(text.size(), components[i].indent);
|
||||
}
|
||||
assert(expected_indents.size() == text.size());
|
||||
|
||||
// Compute the indents
|
||||
std::vector<int> indents = parse_util_compute_indents(text);
|
||||
|
||||
if (expected_indents.size() != indents.size())
|
||||
{
|
||||
err(L"Indent vector has wrong size! Expected %lu, actual %lu", expected_indents.size(), indents.size());
|
||||
}
|
||||
assert(expected_indents.size() == indents.size());
|
||||
for (size_t i=0; i < text.size(); i++)
|
||||
{
|
||||
if (expected_indents.at(i) != indents.at(i))
|
||||
{
|
||||
err(L"Wrong indent at index %lu in test #%lu (expected %d, actual %d):\n%ls\n", i, which + 1, expected_indents.at(i), indents.at(i), text.c_str());
|
||||
break; //don't keep showing errors for the rest of the line
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static void test_utils()
|
||||
{
|
||||
say(L"Testing utils");
|
||||
|
@ -2176,25 +2326,26 @@ static void test_new_parser_ll2(void)
|
|||
}
|
||||
}
|
||||
|
||||
__attribute__((unused))
|
||||
static void test_new_parser(void)
|
||||
static void test_new_parser_ad_hoc(void)
|
||||
{
|
||||
say(L"Testing new parser");
|
||||
const wcstring src = L"echo hello world";
|
||||
/* Very ad-hoc tests for issues encountered */
|
||||
say(L"Testing new parser ad hoc tests");
|
||||
|
||||
/* Ensure that 'case' terminates a job list */
|
||||
const wcstring src = L"switch foo ; case bar; case baz; end";
|
||||
parse_node_tree_t parse_tree;
|
||||
bool success = parse_t::parse(src, parse_flag_none, &parse_tree, NULL);
|
||||
if (! success)
|
||||
{
|
||||
say(L"Parsing failed");
|
||||
err(L"Parsing failed");
|
||||
}
|
||||
else
|
||||
|
||||
/* Expect three case_item_lists: one for each case, and a terminal one. The bug was that we'd try to run a command 'case' */
|
||||
const parse_node_t &root = parse_tree.at(0);
|
||||
const parse_node_tree_t::parse_node_list_t node_list = parse_tree.find_nodes(root, symbol_case_item_list);
|
||||
if (node_list.size() != 3)
|
||||
{
|
||||
#if 0
|
||||
parse_execution_context_t ctx(parse_tree, src);
|
||||
say(L"Simulating execution:");
|
||||
wcstring simulation = ctx.simulate();
|
||||
say(simulation.c_str());
|
||||
#endif
|
||||
err(L"Expected 3 case item nodes, found %lu", node_list.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2415,7 +2566,7 @@ int main(int argc, char **argv)
|
|||
if (should_test_function("new_parser_ll2")) test_new_parser_ll2();
|
||||
if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); //fuzzing is expensive
|
||||
if (should_test_function("new_parser_correctness")) test_new_parser_correctness();
|
||||
if (should_test_function("new_parser")) test_new_parser();
|
||||
if (should_test_function("new_parser_ad_hoc")) test_new_parser_ad_hoc();
|
||||
if (should_test_function("escape")) test_unescape_sane();
|
||||
if (should_test_function("escape")) test_escape_crazy();
|
||||
if (should_test_function("format")) test_format();
|
||||
|
@ -2425,6 +2576,7 @@ int main(int argc, char **argv)
|
|||
if (should_test_function("fork")) test_fork();
|
||||
if (should_test_function("iothread")) test_iothread();
|
||||
if (should_test_function("parser")) test_parser();
|
||||
if (should_test_function("indents")) test_indents();
|
||||
if (should_test_function("utils")) test_utils();
|
||||
if (should_test_function("escape_sequences")) test_escape_sequences();
|
||||
if (should_test_function("lru")) test_lru();
|
||||
|
@ -2447,6 +2599,8 @@ int main(int argc, char **argv)
|
|||
//history_tests_t::test_history_speed();
|
||||
|
||||
say(L"Encountered %d errors in low-level tests", err_count);
|
||||
if (s_test_run_count == 0)
|
||||
say(L"*** No Tests Were Actually Run! ***");
|
||||
|
||||
/*
|
||||
Skip performance tests for now, since they seem to hang when running from inside make (?)
|
||||
|
|
|
@ -51,6 +51,7 @@ RESOLVE(job_list)
|
|||
{
|
||||
case parse_keyword_end:
|
||||
case parse_keyword_else:
|
||||
case parse_keyword_case:
|
||||
// End this job list
|
||||
return 0;
|
||||
|
||||
|
|
|
@ -240,10 +240,10 @@ static inline parse_token_type_t parse_token_type_from_tokenizer_token(enum toke
|
|||
}
|
||||
|
||||
/* Helper function for dump_tree */
|
||||
static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line)
|
||||
static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, node_offset_t node_idx, size_t indent, wcstring *result, size_t *line)
|
||||
{
|
||||
assert(start < nodes.size());
|
||||
const parse_node_t &node = nodes.at(start);
|
||||
assert(node_idx < nodes.size());
|
||||
const parse_node_t &node = nodes.at(node_idx);
|
||||
|
||||
const size_t spacesPerIndent = 2;
|
||||
|
||||
|
@ -253,26 +253,33 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &
|
|||
if (indent > 0) indent -= 1;
|
||||
}
|
||||
|
||||
append_format(*result, L"%2lu - %l2u ", *line, start);
|
||||
append_format(*result, L"%2lu - %l2u ", *line, node_idx);
|
||||
result->append(indent * spacesPerIndent, L' ');;
|
||||
result->append(node.describe());
|
||||
if (node.child_count > 0)
|
||||
{
|
||||
append_format(*result, L" <%lu children>", node.child_count);
|
||||
}
|
||||
if (node.type == parse_token_type_string)
|
||||
|
||||
if (node.has_source() && node.type == parse_token_type_string)
|
||||
{
|
||||
if (node.source_start == -1)
|
||||
result->append(L": \"");
|
||||
result->append(src, node.source_start, node.source_length);
|
||||
result->append(L"\"");
|
||||
}
|
||||
|
||||
if (node.type != parse_token_type_string)
|
||||
{
|
||||
if (node.has_source())
|
||||
{
|
||||
append_format(*result, L" (no source)");
|
||||
append_format(*result, L" [%ld, %ld]", (long)node.source_start, (long)node.source_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
result->append(L": \"");
|
||||
result->append(src, node.source_start, node.source_length);
|
||||
result->append(L"\"");
|
||||
append_format(*result, L" [no src]", (long)node.source_start, (long)node.source_length);
|
||||
}
|
||||
}
|
||||
|
||||
result->push_back(L'\n');
|
||||
++*line;
|
||||
for (size_t child_idx = node.child_start; child_idx < node.child_start + node.child_count; child_idx++)
|
||||
|
@ -658,7 +665,8 @@ bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token)
|
|||
|
||||
if (matched)
|
||||
{
|
||||
// Success. Tell the node that it matched this token
|
||||
// Success. Tell the node that it matched this token, and what its source range is
|
||||
// In the parse phase, we only set source ranges for terminal types. We propagate ranges to parent nodes afterwards.
|
||||
parse_node_t &node = node_for_top_symbol();
|
||||
node.source_start = token.source_start;
|
||||
node.source_length = token.source_length;
|
||||
|
|
|
@ -253,7 +253,6 @@ public:
|
|||
/* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */
|
||||
const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const;
|
||||
|
||||
|
||||
/* Find all the nodes of a given type underneath a given node */
|
||||
typedef std::vector<const parse_node_t *> parse_node_list_t;
|
||||
parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const;
|
||||
|
|
115
parse_util.cpp
115
parse_util.cpp
|
@ -38,6 +38,7 @@
|
|||
#include "env.h"
|
||||
#include "signal.h"
|
||||
#include "wildcard.h"
|
||||
#include "parse_tree.h"
|
||||
|
||||
/**
|
||||
Maximum number of autoloaded items opf a specific type to keep in
|
||||
|
@ -804,3 +805,117 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote)
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* We are given a parse tree, the index of a node within the tree, its indent, and a vector of indents the same size as the original source string. Set the indent correspdonding to the node's source range, if appropriate.
|
||||
|
||||
trailing_indent is the indent for nodes with unrealized source, i.e. if I type 'if false <ret>' then we have an if node with an empty job list (without source) but we want the last line to be indented anyways.
|
||||
|
||||
switch statements also indent.
|
||||
*/
|
||||
static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset_t node_idx, int node_indent, parse_token_type_t parent_type, std::vector<int> *indents, int *trailing_indent)
|
||||
{
|
||||
/* Guard against incomplete trees */
|
||||
if (node_idx > tree.size())
|
||||
return;
|
||||
|
||||
/* We could implement this by utilizing the fish grammar. But there's an easy trick instead: almost everything that wraps a job list should be indented by 1. So just find all of the job lists. One exception is switch; the other exception is job_list itself: a job_list is a job and a job_list, and we want that child list to be indented the same as the parent. So just find all job_lists whose parent is not a job_list, and increment their indent by 1. */
|
||||
|
||||
const parse_node_t &node = tree.at(node_idx);
|
||||
const parse_token_type_t node_type = node.type;
|
||||
|
||||
/* Increment the indent if we are either a root job_list, or root case_item_list */
|
||||
const bool is_root_job_list = (node_type == symbol_job_list && parent_type != symbol_job_list);
|
||||
const bool is_root_case_item_list = (node_type == symbol_case_item_list && parent_type != symbol_case_item_list);
|
||||
if (is_root_job_list || is_root_case_item_list)
|
||||
{
|
||||
node_indent += 1;
|
||||
}
|
||||
|
||||
/* If we have source, store the trailing indent unconditionally. If we do not have source, store the trailing indent only if ours is bigger; this prevents the trailing "run" of terminal job lists from affecting the trailing indent. For example, code like this:
|
||||
|
||||
if foo
|
||||
|
||||
will be parsed as this:
|
||||
|
||||
job_list
|
||||
job
|
||||
if_statement
|
||||
job [if]
|
||||
job_list [empty]
|
||||
job_list [empty]
|
||||
|
||||
There's two "terminal" job lists, and we want the innermost one.
|
||||
|
||||
Note we are relying on the fact that nodes are in the same order as the source, i.e. an in-order traversal of the node tree also traverses the source from beginning to end.
|
||||
*/
|
||||
if (node.has_source() || node_indent > *trailing_indent)
|
||||
{
|
||||
*trailing_indent = node_indent;
|
||||
}
|
||||
|
||||
|
||||
/* Store the indent into the indent array */
|
||||
if (node.has_source())
|
||||
{
|
||||
assert(node.source_start < indents->size());
|
||||
indents->at(node.source_start) = node_indent;
|
||||
}
|
||||
|
||||
|
||||
/* Recursive to all our children */
|
||||
for (node_offset_t idx = 0; idx < node.child_count; idx++)
|
||||
{
|
||||
/* Note we pass our type to our child, which becomes its parent node type */
|
||||
compute_indents_recursive(tree, node.child_start + idx, node_indent, node_type, indents, trailing_indent);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> parse_util_compute_indents(const wcstring &src)
|
||||
{
|
||||
/* Make a vector the same size as the input string, which contains the indents. Initialize them to -1. */
|
||||
const size_t src_size = src.size();
|
||||
std::vector<int> indents(src_size, -1);
|
||||
|
||||
parse_node_tree_t tree;
|
||||
parse_t::parse(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */);
|
||||
|
||||
/* The indent that we'll get for the last line */
|
||||
int trailing_indent = 0;
|
||||
|
||||
/* Invoke the recursive version. As a hack, pass job_list for the 'parent' token, which will prevent the really-root job list from indenting */
|
||||
compute_indents_recursive(tree, 0 /* node index */, 0/* current indent */, symbol_job_list, &indents, &trailing_indent);
|
||||
|
||||
int last_indent = 0;
|
||||
for (size_t i=0; i<src_size; i++)
|
||||
{
|
||||
int this_indent = indents.at(i);
|
||||
if (this_indent < 0)
|
||||
{
|
||||
indents.at(i) = last_indent;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* New indent level */
|
||||
last_indent = this_indent;
|
||||
/* Make all whitespace before a token have the new level. This avoid using the wrong indentation level if a new line starts with whitespace. */
|
||||
size_t prev_char_idx = i;
|
||||
while (prev_char_idx--)
|
||||
{
|
||||
if (!wcschr(L" \n\t\r", src.at(prev_char_idx)))
|
||||
break;
|
||||
indents.at(prev_char_idx) = last_indent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Ensure trailing whitespace has the trailing indent. This makes sure a new line is correctly indented even if it is empty. */
|
||||
size_t suffix_idx = src_size;
|
||||
while (suffix_idx--)
|
||||
{
|
||||
if (!wcschr(L" \n\t\r", src.at(suffix_idx)))
|
||||
break;
|
||||
indents.at(suffix_idx) = trailing_indent;
|
||||
}
|
||||
|
||||
return indents;
|
||||
}
|
||||
|
|
|
@ -159,5 +159,7 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_
|
|||
*/
|
||||
wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote);
|
||||
|
||||
/** Given a string, parse it as fish code and then return the indents. The return value has the same size as the string */
|
||||
std::vector<int> parse_util_compute_indents(const wcstring &src);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -2911,7 +2911,7 @@ struct block_info_t
|
|||
bool has_had_case; //if we are a switch, whether we've encountered a case
|
||||
};
|
||||
|
||||
int parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const wchar_t *prefix)
|
||||
parser_test_error_bits_t parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const wchar_t *prefix)
|
||||
{
|
||||
ASSERT_IS_MAIN_THREAD();
|
||||
|
||||
|
@ -2926,7 +2926,6 @@ int parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const w
|
|||
// These are very nearly stacks, but sometimes we have to inspect non-top elements (e.g. return)
|
||||
std::vector<struct block_info_t> block_infos;
|
||||
int indentation_sum = 0; //sum of indentation in block_infos
|
||||
int res = 0;
|
||||
|
||||
/*
|
||||
Set to 1 if the current command is inside a pipeline
|
||||
|
@ -3704,6 +3703,8 @@ int parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const w
|
|||
if (! block_infos.empty())
|
||||
unfinished = 1;
|
||||
|
||||
parser_test_error_bits_t res = 0;
|
||||
|
||||
if (err)
|
||||
res |= PARSER_TEST_ERROR;
|
||||
|
||||
|
|
9
parser.h
9
parser.h
|
@ -13,8 +13,11 @@
|
|||
#include "function.h"
|
||||
#include <vector>
|
||||
|
||||
#define PARSER_TEST_ERROR 1
|
||||
#define PARSER_TEST_INCOMPLETE 2
|
||||
enum {
|
||||
PARSER_TEST_ERROR = 1,
|
||||
PARSER_TEST_INCOMPLETE = 2
|
||||
};
|
||||
typedef unsigned int parser_test_error_bits_t;
|
||||
|
||||
/**
|
||||
event_blockage_t represents a block on events of the specified type
|
||||
|
@ -484,7 +487,7 @@ public:
|
|||
\param out if non-null, any errors in the command will be filled out into this buffer
|
||||
\param prefix the prefix string to prepend to each error message written to the \c out buffer
|
||||
*/
|
||||
int test(const wchar_t * buff, int *block_level = NULL, wcstring *out = NULL, const wchar_t *prefix = NULL);
|
||||
parser_test_error_bits_t test(const wchar_t * buff, int *block_level = NULL, wcstring *out = NULL, const wchar_t *prefix = NULL);
|
||||
|
||||
/**
|
||||
Test if the specified string can be parsed as an argument list,
|
||||
|
|
|
@ -519,7 +519,14 @@ wcstring combine_command_and_autosuggestion(const wcstring &cmdline, const wcstr
|
|||
static void reader_repaint()
|
||||
{
|
||||
// Update the indentation
|
||||
parser_t::principal_parser().test(data->command_line.c_str(), &data->indents[0]);
|
||||
if (0)
|
||||
{
|
||||
parser_t::principal_parser().test(data->command_line.c_str(), &data->indents[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
data->indents = parse_util_compute_indents(data->command_line);
|
||||
}
|
||||
|
||||
// Combine the command and autosuggestion into one string
|
||||
wcstring full_line = combine_command_and_autosuggestion(data->command_line, data->autosuggestion);
|
||||
|
|
Loading…
Reference in a new issue