Fix for issue where last job_list in tree would have a -1 production_idx

because we never actually sent the terminal token type
This commit is contained in:
ridiculousfish 2013-12-26 14:52:15 -08:00
parent c1a13ae8bc
commit a6ca809a4e
3 changed files with 31 additions and 9 deletions

View file

@ -2212,6 +2212,12 @@ static bool increment(std::vector<parser_fuzz_token_t> &tokens)
if (! incremented_in_keyword) if (! incremented_in_keyword)
{ {
token.token_type++; token.token_type++;
// Skip the very special parse_token_type_terminate, since that's always the last thing delivered
if (token.token_type == parse_token_type_terminate)
{
token.token_type++;
}
if (token.token_type > LAST_TERMINAL_TYPE) if (token.token_type > LAST_TERMINAL_TYPE)
{ {
token.token_type = FIRST_TERMINAL_TYPE; token.token_type = FIRST_TERMINAL_TYPE;

View file

@ -56,6 +56,8 @@ enum parse_token_type_t
parse_token_type_redirection, parse_token_type_redirection,
parse_token_type_background, parse_token_type_background,
parse_token_type_end, parse_token_type_end,
// Special terminal type that means no more tokens forthcoming
parse_token_type_terminate, parse_token_type_terminate,
// Very special terminal types that don't appear in the production list // Very special terminal types that don't appear in the production list

View file

@ -224,6 +224,7 @@ wcstring keyword_description(parse_keyword_t k)
wcstring parse_node_t::describe(void) const wcstring parse_node_t::describe(void) const
{ {
wcstring result = token_type_description(type); wcstring result = token_type_description(type);
append_format(result, L" (prod %d)", this->production_idx);
return result; return result;
} }
@ -455,7 +456,7 @@ class parse_ll_t
} }
else else
{ {
// Generate the parse node. Note that this push_back may invalidate node. // Generate the parse node.
parse_token_type_t child_type = production_element_type(elem); parse_token_type_t child_type = production_element_type(elem);
parse_node_t child = parse_node_t(child_type); parse_node_t child = parse_node_t(child_type);
child.parent = parent_node_idx; child.parent = parent_node_idx;
@ -861,8 +862,10 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2)
} }
else else
{ {
// When a job_list encounters something like 'else', it returns an empty production to return control to the outer block. But if it's unbalanced, then we'll end up with an empty stack! So make sure that doesn't happen. This is the primary mechanism by which we detect e.g. unbalanced end. bool is_terminate = (token1.type == parse_token_type_terminate);
if (symbol_stack.size() == 1 && production_is_empty(production))
// When a job_list encounters something like 'else', it returns an empty production to return control to the outer block. But if it's unbalanced, then we'll end up with an empty stack! So make sure that doesn't happen. This is the primary mechanism by which we detect e.g. unbalanced end. However, if we get a true terminate token, then we allow (expect) this to empty the stack
if (symbol_stack.size() == 1 && production_is_empty(production) && ! is_terminate)
{ {
this->parse_error_unbalancing_token(token1); this->parse_error_unbalancing_token(token1);
break; break;
@ -872,8 +875,14 @@ void parse_ll_t::accept_tokens(parse_token_t token1, parse_token_t token2)
// Note that stack_elem is invalidated by popping the stack. // Note that stack_elem is invalidated by popping the stack.
symbol_stack_pop_push_production(production); symbol_stack_pop_push_production(production);
// Expect to not have an empty stack // Expect to not have an empty stack, unless this was the terminate type
assert(! symbol_stack.empty()); // Note we may not have an empty stack with the terminate type (i.e. incomplete input)
assert(is_terminate || ! symbol_stack.empty());
if (symbol_stack.empty())
{
break;
}
} }
} }
} }
@ -931,12 +940,15 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt)
/* Placeholder invalid token */ /* Placeholder invalid token */
static const parse_token_t kInvalidToken = {token_type_invalid, parse_keyword_none, false, -1, -1}; static const parse_token_t kInvalidToken = {token_type_invalid, parse_keyword_none, false, -1, -1};
/* Terminal token */
static const parse_token_t kTerminalToken = {parse_token_type_terminate, parse_keyword_none, false, -1, -1};
/* Return a new parse token, advancing the tokenizer */ /* Return a new parse token, advancing the tokenizer */
static inline parse_token_t next_parse_token(tokenizer_t *tok) static inline parse_token_t next_parse_token(tokenizer_t *tok)
{ {
if (! tok_has_next(tok)) if (! tok_has_next(tok))
{ {
return kInvalidToken; return kTerminalToken;
} }
token_type tok_type = static_cast<token_type>(tok_last_type(tok)); token_type tok_type = static_cast<token_type>(tok_last_type(tok));
@ -978,8 +990,8 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
/* We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our queue with an initial token at index 1. */ /* We are an LL(2) parser. We pass two tokens at a time. New tokens come in at index 1. Seed our queue with an initial token at index 1. */
parse_token_t queue[2] = {kInvalidToken, next_parse_token(&tok)}; parse_token_t queue[2] = {kInvalidToken, next_parse_token(&tok)};
/* Go until the most recently added token is invalid. Note this may mean we don't process anything if there were no tokens. */ /* Loop until we get a terminal token */
while (queue[1].type != token_type_invalid) do
{ {
/* Push a new token onto the queue */ /* Push a new token onto the queue */
queue[0] = queue[1]; queue[0] = queue[1];
@ -1010,7 +1022,9 @@ bool parse_t::parse_internal(const wcstring &str, parse_tree_flags_t parse_flags
break; break;
} }
} }
}
/* If this was the last token, then stop the loop */
} while (queue[0].type != parse_token_type_terminate);
// Teach each node where its source range is // Teach each node where its source range is