Hack the tokenizer to compress multiple adjacent newlines into one

This slightly reduces the size of parse trees, and is otherwise a
minor optimization
This commit is contained in:
ridiculousfish 2014-11-24 01:20:57 -08:00
parent 196a7c9d18
commit eafd577629
2 changed files with 12 additions and 4 deletions

View file

@ -458,10 +458,10 @@ static void test_tok()
say(L"Test destruction of broken tokenizer");
{
const wchar_t *str = L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells ){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect";
const wchar_t *str = L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells ){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect Compress_Newlines\n \n\t\n \nInto_Just_One";
const int types[] =
{
TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_END
TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_STRING, TOK_END, TOK_STRING, TOK_END
};
say(L"Test correct tokenization");

View file

@ -621,14 +621,22 @@ void tok_next(tokenizer_t *tok)
switch (*tok->buff)
{
case L'\0':
tok->last_type = TOK_END;
/*fwprintf( stderr, L"End of string\n" );*/
tok->has_next = false;
break;
case 13:
case 13: // carriage return
case L'\n':
// Hack: when we get a newline, swallow as many as we can
// This compresses multiple subsequent newlines into a single one
while (*tok->buff == L'\n' || *tok->buff == 13 || *tok->buff == ' ' || *tok->buff == '\t')
{
tok->buff++;
}
tok->last_type = TOK_END;
break;
case L';':
tok->last_type = TOK_END;
tok->buff++;