Optimize keyword detection

The data stored in these containers is small enough that it is worth
creating distinct sets for each lookup.

In a microbenchmark of these changes, the single-lookup version of the
function with lookups gated on the length of input (bypassed entirely if
the input is longer than the longest key in the container) provided a
1.5x-3.5x speedup over the previous implementation.

Additionally, as the collections are static and their contents are never
modified after startup, it makes no sense to continously calculate the
location of and allocate an iterator for the `!= foo.end()` comparison;
the end iterator is now statically cached.

I'm not expecting massive speed gains out of this change, but the parser
does perform enough of these to make it worth optimizing in this way.
This commit is contained in:
Mahmoud Al-Qudsi 2019-04-03 20:38:29 -05:00
parent e2ed6baf43
commit bc66921ac9

View file

@ -8,26 +8,82 @@
#include "fallback.h" // IWYU pragma: keep #include "fallback.h" // IWYU pragma: keep
#include "parser_keywords.h" #include "parser_keywords.h"
typedef std::unordered_set<wcstring> string_set_t;
static const wcstring skip_keywords[] {
L"else",
L"begin",
};
static const wcstring subcommand_keywords[] {
L"command", L"builtin", L"while", L"exec",
L"if", L"and", L"or", L"not"
};
static const string_set_t block_keywords = {
L"for", L"while", L"if",
L"function", L"switch", L"begin"
};
static const wcstring reserved_keywords[] = {
L"end", L"case", L"else", L"return",
L"continue", L"break", L"argparse", L"read",
L"set", L"status", L"test", L"["
};
// The lists above are purposely implemented separately from the logic below, so that future
// maintainers may assume the contents of the list based off their names, and not off what the
// functions below require them to contain.
static size_t list_max_length(const string_set_t &list) {
size_t result = 0;
for (const auto &w: list) {
if (w.length() > result) {
result = w.length();
}
}
return result;
}
bool parser_keywords_skip_arguments(const wcstring &cmd) { bool parser_keywords_skip_arguments(const wcstring &cmd) {
static const wcstring el = L"else"; return cmd == skip_keywords[0] || cmd == skip_keywords[1];
static const wcstring beg = L"begin";
return cmd == el || cmd == beg;
} }
static const std::unordered_set<wcstring> subcommand_keywords = {L"command", L"builtin", L"while", L"exec",
L"if", L"and", L"or", L"not"};
bool parser_keywords_is_subcommand(const wcstring &cmd) { bool parser_keywords_is_subcommand(const wcstring &cmd) {
return parser_keywords_skip_arguments(cmd) || contains(subcommand_keywords, cmd); const static string_set_t search_list = ([](){
string_set_t results;
results.insert(std::begin(subcommand_keywords), std::end(subcommand_keywords));
results.insert(std::begin(skip_keywords), std::end(skip_keywords));
return results;
})();
const static auto max_len = list_max_length(search_list);
const static auto not_found = search_list.end();
// Everything above is executed only at startup, this is the actual optimized search routine:
return cmd.length() <= max_len && search_list.find(cmd) != not_found;
} }
static const std::unordered_set<wcstring> block_keywords = {L"for", L"while", L"if", bool parser_keywords_is_block(const wcstring &word) {
L"function", L"switch", L"begin"}; const static auto max_len = list_max_length(block_keywords);
bool parser_keywords_is_block(const wcstring &word) { return contains(block_keywords, word); } const static auto not_found = block_keywords.end();
// Everything above is executed only at startup, this is the actual optimized search routine:
return word.length() <= max_len && block_keywords.find(word) != not_found;
}
static const std::unordered_set<wcstring> reserved_keywords = {L"end", L"case", L"else", L"return",
L"continue", L"break", L"argparse", L"read",
L"set", L"status", L"test", L"["};
bool parser_keywords_is_reserved(const wcstring &word) { bool parser_keywords_is_reserved(const wcstring &word) {
return parser_keywords_is_block(word) || parser_keywords_is_subcommand(word) || const static string_set_t search_list = ([](){
contains(reserved_keywords, word); string_set_t results;
results.insert(std::begin(subcommand_keywords), std::end(subcommand_keywords));
results.insert(std::begin(skip_keywords), std::end(skip_keywords));
results.insert(std::begin(block_keywords), std::end(block_keywords));
results.insert(std::begin(reserved_keywords), std::end(reserved_keywords));
return results;
})();
const static auto max_len = list_max_length(search_list);
const static auto not_found = search_list.end();
// Everything above is executed only at startup, this is the actual optimized search routine:
return word.length() <= max_len && search_list.find(word) != not_found;
} }