From bc66921ac9c10bf6a36e2b9b9f9dd83347b1d3e6 Mon Sep 17 00:00:00 2001 From: Mahmoud Al-Qudsi Date: Wed, 3 Apr 2019 20:38:29 -0500 Subject: [PATCH] Optimize keyword detection The data stored in these containers is small enough that it is worth creating distinct sets for each lookup. In a microbenchmark of these changes, the single-lookup version of the function with lookups gated on the length of input (bypassed entirely if the input is longer than the longest key in the container) provided a 1.5x-3.5x speedup over the previous implementation. Additionally, as the collections are static and their contents are never modified after startup, it makes no sense to continously calculate the location of and allocate an iterator for the `!= foo.end()` comparison; the end iterator is now statically cached. I'm not expecting massive speed gains out of this change, but the parser does perform enough of these to make it worth optimizing in this way. --- src/parser_keywords.cpp | 84 ++++++++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 14 deletions(-) diff --git a/src/parser_keywords.cpp b/src/parser_keywords.cpp index 4711d178d..7f1bdecc7 100644 --- a/src/parser_keywords.cpp +++ b/src/parser_keywords.cpp @@ -8,26 +8,82 @@ #include "fallback.h" // IWYU pragma: keep #include "parser_keywords.h" +typedef std::unordered_set string_set_t; + +static const wcstring skip_keywords[] { + L"else", + L"begin", +}; + +static const wcstring subcommand_keywords[] { + L"command", L"builtin", L"while", L"exec", + L"if", L"and", L"or", L"not" +}; + +static const string_set_t block_keywords = { + L"for", L"while", L"if", + L"function", L"switch", L"begin" +}; + +static const wcstring reserved_keywords[] = { + L"end", L"case", L"else", L"return", + L"continue", L"break", L"argparse", L"read", + L"set", L"status", L"test", L"[" +}; + +// The lists above are purposely implemented separately from the logic below, so that future +// maintainers may assume the contents of the list based off their names, and not off what the +// functions below require them to contain. + +static size_t list_max_length(const string_set_t &list) { + size_t result = 0; + for (const auto &w: list) { + if (w.length() > result) { + result = w.length(); + } + } + return result; +} + bool parser_keywords_skip_arguments(const wcstring &cmd) { - static const wcstring el = L"else"; - static const wcstring beg = L"begin"; - return cmd == el || cmd == beg; + return cmd == skip_keywords[0] || cmd == skip_keywords[1]; } -static const std::unordered_set subcommand_keywords = {L"command", L"builtin", L"while", L"exec", - L"if", L"and", L"or", L"not"}; bool parser_keywords_is_subcommand(const wcstring &cmd) { - return parser_keywords_skip_arguments(cmd) || contains(subcommand_keywords, cmd); + const static string_set_t search_list = ([](){ + string_set_t results; + results.insert(std::begin(subcommand_keywords), std::end(subcommand_keywords)); + results.insert(std::begin(skip_keywords), std::end(skip_keywords)); + return results; + })(); + + const static auto max_len = list_max_length(search_list); + const static auto not_found = search_list.end(); + + // Everything above is executed only at startup, this is the actual optimized search routine: + return cmd.length() <= max_len && search_list.find(cmd) != not_found; } -static const std::unordered_set block_keywords = {L"for", L"while", L"if", - L"function", L"switch", L"begin"}; -bool parser_keywords_is_block(const wcstring &word) { return contains(block_keywords, word); } +bool parser_keywords_is_block(const wcstring &word) { + const static auto max_len = list_max_length(block_keywords); + const static auto not_found = block_keywords.end(); + + // Everything above is executed only at startup, this is the actual optimized search routine: + return word.length() <= max_len && block_keywords.find(word) != not_found; +} -static const std::unordered_set reserved_keywords = {L"end", L"case", L"else", L"return", - L"continue", L"break", L"argparse", L"read", - L"set", L"status", L"test", L"["}; bool parser_keywords_is_reserved(const wcstring &word) { - return parser_keywords_is_block(word) || parser_keywords_is_subcommand(word) || - contains(reserved_keywords, word); + const static string_set_t search_list = ([](){ + string_set_t results; + results.insert(std::begin(subcommand_keywords), std::end(subcommand_keywords)); + results.insert(std::begin(skip_keywords), std::end(skip_keywords)); + results.insert(std::begin(block_keywords), std::end(block_keywords)); + results.insert(std::begin(reserved_keywords), std::end(reserved_keywords)); + return results; + })(); + const static auto max_len = list_max_length(search_list); + const static auto not_found = search_list.end(); + + // Everything above is executed only at startup, this is the actual optimized search routine: + return word.length() <= max_len && search_list.find(word) != not_found; }