Use binary search to determine what tokens are keywords

This commit is contained in:
ridiculousfish 2014-10-15 12:04:23 -07:00
parent 1927ebbc5d
commit ff7108877b
2 changed files with 45 additions and 32 deletions

View file

@ -77,27 +77,27 @@ enum parse_token_type_t
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
} __packed; } __packed;
/* These must be maintained in sorted order (except for none, which isn't a keyword). This enables us to do binary search. */
enum parse_keyword_t enum parse_keyword_t
{ {
parse_keyword_none, parse_keyword_none,
parse_keyword_if,
parse_keyword_else,
parse_keyword_for,
parse_keyword_in,
parse_keyword_while,
parse_keyword_begin,
parse_keyword_function,
parse_keyword_switch,
parse_keyword_case,
parse_keyword_end,
parse_keyword_and, parse_keyword_and,
parse_keyword_or, parse_keyword_begin,
parse_keyword_not,
parse_keyword_command,
parse_keyword_builtin, parse_keyword_builtin,
parse_keyword_case,
parse_keyword_command,
parse_keyword_else,
parse_keyword_end,
parse_keyword_exec, parse_keyword_exec,
parse_keyword_for,
LAST_KEYWORD = parse_keyword_exec parse_keyword_function,
parse_keyword_if,
parse_keyword_in,
parse_keyword_not,
parse_keyword_or,
parse_keyword_switch,
parse_keyword_while,
LAST_KEYWORD = parse_keyword_while
} __packed; } __packed;
/* Statement decorations. This matches the order of productions in decorated_statement */ /* Statement decorations. This matches the order of productions in decorated_statement */

View file

@ -236,23 +236,24 @@ static const struct
} }
keyword_map[] = keyword_map[] =
{ {
/* Note that these must be sorted (except for the first), so that we can do binary search */
KEYWORD_MAP(none), KEYWORD_MAP(none),
KEYWORD_MAP(if),
KEYWORD_MAP(else),
KEYWORD_MAP(for),
KEYWORD_MAP(in),
KEYWORD_MAP(while),
KEYWORD_MAP(begin),
KEYWORD_MAP(function),
KEYWORD_MAP(switch),
KEYWORD_MAP(case),
KEYWORD_MAP(end),
KEYWORD_MAP(and), KEYWORD_MAP(and),
KEYWORD_MAP(or), KEYWORD_MAP(begin),
KEYWORD_MAP(not),
KEYWORD_MAP(command),
KEYWORD_MAP(builtin), KEYWORD_MAP(builtin),
KEYWORD_MAP(exec) KEYWORD_MAP(case),
KEYWORD_MAP(command),
KEYWORD_MAP(else),
KEYWORD_MAP(end),
KEYWORD_MAP(exec),
KEYWORD_MAP(for),
KEYWORD_MAP(function),
KEYWORD_MAP(if),
KEYWORD_MAP(in),
KEYWORD_MAP(not),
KEYWORD_MAP(or),
KEYWORD_MAP(switch),
KEYWORD_MAP(while)
}; };
wcstring keyword_description(parse_keyword_t k) wcstring keyword_description(parse_keyword_t k)
@ -1187,11 +1188,23 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt)
parse_keyword_t result = parse_keyword_none; parse_keyword_t result = parse_keyword_none;
if (tok == TOK_STRING) if (tok == TOK_STRING)
{ {
for (size_t i=0; i < sizeof keyword_map / sizeof *keyword_map; i++) /* Binary search on keyword_map. Start at 1 since 0 is keyword_none */
size_t left = 1, right = sizeof keyword_map / sizeof *keyword_map;
while (left < right)
{ {
if (! wcscmp(keyword_map[i].name, tok_txt)) size_t mid = left + (right - left)/2;
int cmp = wcscmp(tok_txt, keyword_map[mid].name);
if (cmp < 0)
{ {
result = keyword_map[i].keyword; right = mid; // tok_txt was smaller than mid
}
else if (cmp > 0)
{
left = mid + 1; // tok_txt was larger than mid
}
else
{
result = keyword_map[mid].keyword; // found it
break; break;
} }
} }