Rewriting indenting functionality to use new parser

This commit is contained in:
ridiculousfish 2013-12-08 13:41:12 -08:00
parent a23441109d
commit dd0cc5ed9f
9 changed files with 328 additions and 38 deletions

View file

@ -64,23 +64,32 @@
#include "parse_util.h" #include "parse_util.h"
static const char * const * s_arguments; static const char * const * s_arguments;
static int s_test_run_count = 0;
/* Indicate if we should test the given function. Either we test everything (all arguments) or we run only tests that have a prefix in s_arguments */ /* Indicate if we should test the given function. Either we test everything (all arguments) or we run only tests that have a prefix in s_arguments */
static bool should_test_function(const char *func_name) static bool should_test_function(const char *func_name)
{ {
/* No args, test everything */ /* No args, test everything */
bool result = false;
if (! s_arguments || ! s_arguments[0]) if (! s_arguments || ! s_arguments[0])
return true;
for (size_t i=0; s_arguments[i] != NULL; i++)
{ {
if (! strncmp(func_name, s_arguments[i], strlen(s_arguments[i]))) result = true;
}
else
{
for (size_t i=0; s_arguments[i] != NULL; i++)
{ {
/* Prefix match */ if (! strncmp(func_name, s_arguments[i], strlen(s_arguments[i])))
return true; {
/* Prefix match */
result = true;
break;
}
} }
} }
return false; if (result)
s_test_run_count++;
return result;
} }
/** /**
@ -640,6 +649,147 @@ static void test_parser()
} }
} }
static void test_indents()
{
say(L"Testing indents");
// Here are the components of our source and the indents we expect those to be
struct indent_component_t {
const wchar_t *txt;
int indent;
};
const indent_component_t components1[] =
{
{L"if foo", 0},
{L"end", 0},
{NULL, -1}
};
const indent_component_t components2[] =
{
{L"if foo", 0},
{L"", 1}, //trailing newline!
{NULL, -1}
};
const indent_component_t components3[] =
{
{L"if foo", 0},
{L"foo", 1},
{L"end", 0}, //trailing newline!
{NULL, -1}
};
const indent_component_t components4[] =
{
{L"if foo", 0},
{L"if bar", 1},
{L"end", 1},
{L"end", 0},
{L"", 0},
{NULL, -1}
};
const indent_component_t components5[] =
{
{L"if foo", 0},
{L"if bar", 1},
{L"", 2},
{NULL, -1}
};
const indent_component_t components6[] =
{
{L"begin", 0},
{L"foo", 1},
{L"", 1},
{NULL, -1}
};
const indent_component_t components7[] =
{
{L"begin; end", 0},
{L"foo", 0},
{L"", 0},
{NULL, -1}
};
const indent_component_t components8[] =
{
{L"if foo", 0},
{L"if bar", 1},
{L"baz", 2},
{L"end", 1},
{L"", 1},
{NULL, -1}
};
const indent_component_t components9[] =
{
{L"switch foo", 0},
{L"", 1},
{NULL, -1}
};
const indent_component_t components10[] =
{
{L"switch foo", 0},
{L"case bar", 1},
{L"case baz", 1},
{L"quux", 2},
{L"", 2},
{NULL, -1}
};
const indent_component_t *tests[] = {components1, components2, components3, components4, components5, components6, components7, components8, components9, components10};
for (size_t which = 0; which < sizeof tests / sizeof *tests; which++)
{
const indent_component_t *components = tests[which];
// Count how many we have
size_t component_count = 0;
while (components[component_count].txt != NULL)
{
component_count++;
}
// Generate the expected indents
wcstring text;
std::vector<int> expected_indents;
for (size_t i=0; i < component_count; i++)
{
if (i > 0)
{
text.push_back(L'\n');
expected_indents.push_back(components[i].indent);
}
text.append(components[i].txt);
expected_indents.resize(text.size(), components[i].indent);
}
assert(expected_indents.size() == text.size());
// Compute the indents
std::vector<int> indents = parse_util_compute_indents(text);
if (expected_indents.size() != indents.size())
{
err(L"Indent vector has wrong size! Expected %lu, actual %lu", expected_indents.size(), indents.size());
}
assert(expected_indents.size() == indents.size());
for (size_t i=0; i < text.size(); i++)
{
if (expected_indents.at(i) != indents.at(i))
{
err(L"Wrong indent at index %lu in test #%lu (expected %d, actual %d):\n%ls\n", i, which + 1, expected_indents.at(i), indents.at(i), text.c_str());
break; //don't keep showing errors for the rest of the line
}
}
}
}
static void test_utils() static void test_utils()
{ {
say(L"Testing utils"); say(L"Testing utils");
@ -2176,25 +2326,26 @@ static void test_new_parser_ll2(void)
} }
} }
__attribute__((unused)) static void test_new_parser_ad_hoc(void)
static void test_new_parser(void)
{ {
say(L"Testing new parser"); /* Very ad-hoc tests for issues encountered */
const wcstring src = L"echo hello world"; say(L"Testing new parser ad hoc tests");
/* Ensure that 'case' terminates a job list */
const wcstring src = L"switch foo ; case bar; case baz; end";
parse_node_tree_t parse_tree; parse_node_tree_t parse_tree;
bool success = parse_t::parse(src, parse_flag_none, &parse_tree, NULL); bool success = parse_t::parse(src, parse_flag_none, &parse_tree, NULL);
if (! success) if (! success)
{ {
say(L"Parsing failed"); err(L"Parsing failed");
} }
else
/* Expect three case_item_lists: one for each case, and a terminal one. The bug was that we'd try to run a command 'case' */
const parse_node_t &root = parse_tree.at(0);
const parse_node_tree_t::parse_node_list_t node_list = parse_tree.find_nodes(root, symbol_case_item_list);
if (node_list.size() != 3)
{ {
#if 0 err(L"Expected 3 case item nodes, found %lu", node_list.size());
parse_execution_context_t ctx(parse_tree, src);
say(L"Simulating execution:");
wcstring simulation = ctx.simulate();
say(simulation.c_str());
#endif
} }
} }
@ -2415,7 +2566,7 @@ int main(int argc, char **argv)
if (should_test_function("new_parser_ll2")) test_new_parser_ll2(); if (should_test_function("new_parser_ll2")) test_new_parser_ll2();
if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); //fuzzing is expensive if (should_test_function("new_parser_fuzzing")) test_new_parser_fuzzing(); //fuzzing is expensive
if (should_test_function("new_parser_correctness")) test_new_parser_correctness(); if (should_test_function("new_parser_correctness")) test_new_parser_correctness();
if (should_test_function("new_parser")) test_new_parser(); if (should_test_function("new_parser_ad_hoc")) test_new_parser_ad_hoc();
if (should_test_function("escape")) test_unescape_sane(); if (should_test_function("escape")) test_unescape_sane();
if (should_test_function("escape")) test_escape_crazy(); if (should_test_function("escape")) test_escape_crazy();
if (should_test_function("format")) test_format(); if (should_test_function("format")) test_format();
@ -2425,6 +2576,7 @@ int main(int argc, char **argv)
if (should_test_function("fork")) test_fork(); if (should_test_function("fork")) test_fork();
if (should_test_function("iothread")) test_iothread(); if (should_test_function("iothread")) test_iothread();
if (should_test_function("parser")) test_parser(); if (should_test_function("parser")) test_parser();
if (should_test_function("indents")) test_indents();
if (should_test_function("utils")) test_utils(); if (should_test_function("utils")) test_utils();
if (should_test_function("escape_sequences")) test_escape_sequences(); if (should_test_function("escape_sequences")) test_escape_sequences();
if (should_test_function("lru")) test_lru(); if (should_test_function("lru")) test_lru();
@ -2447,6 +2599,8 @@ int main(int argc, char **argv)
//history_tests_t::test_history_speed(); //history_tests_t::test_history_speed();
say(L"Encountered %d errors in low-level tests", err_count); say(L"Encountered %d errors in low-level tests", err_count);
if (s_test_run_count == 0)
say(L"*** No Tests Were Actually Run! ***");
/* /*
Skip performance tests for now, since they seem to hang when running from inside make (?) Skip performance tests for now, since they seem to hang when running from inside make (?)

View file

@ -51,6 +51,7 @@ RESOLVE(job_list)
{ {
case parse_keyword_end: case parse_keyword_end:
case parse_keyword_else: case parse_keyword_else:
case parse_keyword_case:
// End this job list // End this job list
return 0; return 0;

View file

@ -240,10 +240,10 @@ static inline parse_token_type_t parse_token_type_from_tokenizer_token(enum toke
} }
/* Helper function for dump_tree */ /* Helper function for dump_tree */
static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, size_t start, size_t indent, wcstring *result, size_t *line) static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, node_offset_t node_idx, size_t indent, wcstring *result, size_t *line)
{ {
assert(start < nodes.size()); assert(node_idx < nodes.size());
const parse_node_t &node = nodes.at(start); const parse_node_t &node = nodes.at(node_idx);
const size_t spacesPerIndent = 2; const size_t spacesPerIndent = 2;
@ -253,26 +253,33 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &
if (indent > 0) indent -= 1; if (indent > 0) indent -= 1;
} }
append_format(*result, L"%2lu - %l2u ", *line, start); append_format(*result, L"%2lu - %l2u ", *line, node_idx);
result->append(indent * spacesPerIndent, L' ');; result->append(indent * spacesPerIndent, L' ');;
result->append(node.describe()); result->append(node.describe());
if (node.child_count > 0) if (node.child_count > 0)
{ {
append_format(*result, L" <%lu children>", node.child_count); append_format(*result, L" <%lu children>", node.child_count);
} }
if (node.type == parse_token_type_string)
if (node.has_source() && node.type == parse_token_type_string)
{ {
if (node.source_start == -1) result->append(L": \"");
result->append(src, node.source_start, node.source_length);
result->append(L"\"");
}
if (node.type != parse_token_type_string)
{
if (node.has_source())
{ {
append_format(*result, L" (no source)"); append_format(*result, L" [%ld, %ld]", (long)node.source_start, (long)node.source_length);
} }
else else
{ {
result->append(L": \""); append_format(*result, L" [no src]", (long)node.source_start, (long)node.source_length);
result->append(src, node.source_start, node.source_length);
result->append(L"\"");
} }
} }
result->push_back(L'\n'); result->push_back(L'\n');
++*line; ++*line;
for (size_t child_idx = node.child_start; child_idx < node.child_start + node.child_count; child_idx++) for (size_t child_idx = node.child_start; child_idx < node.child_start + node.child_count; child_idx++)
@ -658,7 +665,8 @@ bool parse_ll_t::top_node_handle_terminal_types(parse_token_t token)
if (matched) if (matched)
{ {
// Success. Tell the node that it matched this token // Success. Tell the node that it matched this token, and what its source range is
// In the parse phase, we only set source ranges for terminal types. We propagate ranges to parent nodes afterwards.
parse_node_t &node = node_for_top_symbol(); parse_node_t &node = node_for_top_symbol();
node.source_start = token.source_start; node.source_start = token.source_start;
node.source_length = token.source_length; node.source_length = token.source_length;

View file

@ -253,7 +253,6 @@ public:
/* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */ /* Get the node corresponding to the parent of the given node, or NULL if there is no such child. If expected_type is provided, only returns the parent if it is of that type. Note the asymmetry: get_child asserts since the children are known, but get_parent does not, since the parent may not be known. */
const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const; const parse_node_t *get_parent(const parse_node_t &node, parse_token_type_t expected_type = token_type_invalid) const;
/* Find all the nodes of a given type underneath a given node */ /* Find all the nodes of a given type underneath a given node */
typedef std::vector<const parse_node_t *> parse_node_list_t; typedef std::vector<const parse_node_t *> parse_node_list_t;
parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const; parse_node_list_t find_nodes(const parse_node_t &parent, parse_token_type_t type) const;

View file

@ -38,6 +38,7 @@
#include "env.h" #include "env.h"
#include "signal.h" #include "signal.h"
#include "wildcard.h" #include "wildcard.h"
#include "parse_tree.h"
/** /**
Maximum number of autoloaded items opf a specific type to keep in Maximum number of autoloaded items opf a specific type to keep in
@ -804,3 +805,117 @@ wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote)
} }
return result; return result;
} }
/* We are given a parse tree, the index of a node within the tree, its indent, and a vector of indents the same size as the original source string. Set the indent correspdonding to the node's source range, if appropriate.
trailing_indent is the indent for nodes with unrealized source, i.e. if I type 'if false <ret>' then we have an if node with an empty job list (without source) but we want the last line to be indented anyways.
switch statements also indent.
*/
static void compute_indents_recursive(const parse_node_tree_t &tree, node_offset_t node_idx, int node_indent, parse_token_type_t parent_type, std::vector<int> *indents, int *trailing_indent)
{
/* Guard against incomplete trees */
if (node_idx > tree.size())
return;
/* We could implement this by utilizing the fish grammar. But there's an easy trick instead: almost everything that wraps a job list should be indented by 1. So just find all of the job lists. One exception is switch; the other exception is job_list itself: a job_list is a job and a job_list, and we want that child list to be indented the same as the parent. So just find all job_lists whose parent is not a job_list, and increment their indent by 1. */
const parse_node_t &node = tree.at(node_idx);
const parse_token_type_t node_type = node.type;
/* Increment the indent if we are either a root job_list, or root case_item_list */
const bool is_root_job_list = (node_type == symbol_job_list && parent_type != symbol_job_list);
const bool is_root_case_item_list = (node_type == symbol_case_item_list && parent_type != symbol_case_item_list);
if (is_root_job_list || is_root_case_item_list)
{
node_indent += 1;
}
/* If we have source, store the trailing indent unconditionally. If we do not have source, store the trailing indent only if ours is bigger; this prevents the trailing "run" of terminal job lists from affecting the trailing indent. For example, code like this:
if foo
will be parsed as this:
job_list
job
if_statement
job [if]
job_list [empty]
job_list [empty]
There's two "terminal" job lists, and we want the innermost one.
Note we are relying on the fact that nodes are in the same order as the source, i.e. an in-order traversal of the node tree also traverses the source from beginning to end.
*/
if (node.has_source() || node_indent > *trailing_indent)
{
*trailing_indent = node_indent;
}
/* Store the indent into the indent array */
if (node.has_source())
{
assert(node.source_start < indents->size());
indents->at(node.source_start) = node_indent;
}
/* Recursive to all our children */
for (node_offset_t idx = 0; idx < node.child_count; idx++)
{
/* Note we pass our type to our child, which becomes its parent node type */
compute_indents_recursive(tree, node.child_start + idx, node_indent, node_type, indents, trailing_indent);
}
}
std::vector<int> parse_util_compute_indents(const wcstring &src)
{
/* Make a vector the same size as the input string, which contains the indents. Initialize them to -1. */
const size_t src_size = src.size();
std::vector<int> indents(src_size, -1);
parse_node_tree_t tree;
parse_t::parse(src, parse_flag_continue_after_error | parse_flag_accept_incomplete_tokens, &tree, NULL /* errors */);
/* The indent that we'll get for the last line */
int trailing_indent = 0;
/* Invoke the recursive version. As a hack, pass job_list for the 'parent' token, which will prevent the really-root job list from indenting */
compute_indents_recursive(tree, 0 /* node index */, 0/* current indent */, symbol_job_list, &indents, &trailing_indent);
int last_indent = 0;
for (size_t i=0; i<src_size; i++)
{
int this_indent = indents.at(i);
if (this_indent < 0)
{
indents.at(i) = last_indent;
}
else
{
/* New indent level */
last_indent = this_indent;
/* Make all whitespace before a token have the new level. This avoid using the wrong indentation level if a new line starts with whitespace. */
size_t prev_char_idx = i;
while (prev_char_idx--)
{
if (!wcschr(L" \n\t\r", src.at(prev_char_idx)))
break;
indents.at(prev_char_idx) = last_indent;
}
}
}
/* Ensure trailing whitespace has the trailing indent. This makes sure a new line is correctly indented even if it is empty. */
size_t suffix_idx = src_size;
while (suffix_idx--)
{
if (!wcschr(L" \n\t\r", src.at(suffix_idx)))
break;
indents.at(suffix_idx) = trailing_indent;
}
return indents;
}

View file

@ -159,5 +159,7 @@ void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_
*/ */
wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote); wcstring parse_util_escape_string_with_quote(const wcstring &cmd, wchar_t quote);
/** Given a string, parse it as fish code and then return the indents. The return value has the same size as the string */
std::vector<int> parse_util_compute_indents(const wcstring &src);
#endif #endif

View file

@ -2911,7 +2911,7 @@ struct block_info_t
bool has_had_case; //if we are a switch, whether we've encountered a case bool has_had_case; //if we are a switch, whether we've encountered a case
}; };
int parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const wchar_t *prefix) parser_test_error_bits_t parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const wchar_t *prefix)
{ {
ASSERT_IS_MAIN_THREAD(); ASSERT_IS_MAIN_THREAD();
@ -2926,7 +2926,6 @@ int parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const w
// These are very nearly stacks, but sometimes we have to inspect non-top elements (e.g. return) // These are very nearly stacks, but sometimes we have to inspect non-top elements (e.g. return)
std::vector<struct block_info_t> block_infos; std::vector<struct block_info_t> block_infos;
int indentation_sum = 0; //sum of indentation in block_infos int indentation_sum = 0; //sum of indentation in block_infos
int res = 0;
/* /*
Set to 1 if the current command is inside a pipeline Set to 1 if the current command is inside a pipeline
@ -3704,6 +3703,8 @@ int parser_t::test(const wchar_t *buff, int *block_level, wcstring *out, const w
if (! block_infos.empty()) if (! block_infos.empty())
unfinished = 1; unfinished = 1;
parser_test_error_bits_t res = 0;
if (err) if (err)
res |= PARSER_TEST_ERROR; res |= PARSER_TEST_ERROR;

View file

@ -13,8 +13,11 @@
#include "function.h" #include "function.h"
#include <vector> #include <vector>
#define PARSER_TEST_ERROR 1 enum {
#define PARSER_TEST_INCOMPLETE 2 PARSER_TEST_ERROR = 1,
PARSER_TEST_INCOMPLETE = 2
};
typedef unsigned int parser_test_error_bits_t;
/** /**
event_blockage_t represents a block on events of the specified type event_blockage_t represents a block on events of the specified type
@ -484,7 +487,7 @@ public:
\param out if non-null, any errors in the command will be filled out into this buffer \param out if non-null, any errors in the command will be filled out into this buffer
\param prefix the prefix string to prepend to each error message written to the \c out buffer \param prefix the prefix string to prepend to each error message written to the \c out buffer
*/ */
int test(const wchar_t * buff, int *block_level = NULL, wcstring *out = NULL, const wchar_t *prefix = NULL); parser_test_error_bits_t test(const wchar_t * buff, int *block_level = NULL, wcstring *out = NULL, const wchar_t *prefix = NULL);
/** /**
Test if the specified string can be parsed as an argument list, Test if the specified string can be parsed as an argument list,

View file

@ -519,7 +519,14 @@ wcstring combine_command_and_autosuggestion(const wcstring &cmdline, const wcstr
static void reader_repaint() static void reader_repaint()
{ {
// Update the indentation // Update the indentation
parser_t::principal_parser().test(data->command_line.c_str(), &data->indents[0]); if (0)
{
parser_t::principal_parser().test(data->command_line.c_str(), &data->indents[0]);
}
else
{
data->indents = parse_util_compute_indents(data->command_line);
}
// Combine the command and autosuggestion into one string // Combine the command and autosuggestion into one string
wcstring full_line = combine_command_and_autosuggestion(data->command_line, data->autosuggestion); wcstring full_line = combine_command_and_autosuggestion(data->command_line, data->autosuggestion);