Hack the tokenizer to compress multiple adjacent newlines into one

This slightly reduces the size of parse trees, and is otherwise a
minor optimization
This commit is contained in:
ridiculousfish 2014-11-24 01:20:57 -08:00
parent 196a7c9d18
commit eafd577629
2 changed files with 12 additions and 4 deletions

View file

@ -458,10 +458,10 @@ static void test_tok()
say(L"Test destruction of broken tokenizer"); say(L"Test destruction of broken tokenizer");
{ {
const wchar_t *str = L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells ){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect"; const wchar_t *str = L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells ){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect Compress_Newlines\n \n\t\n \nInto_Just_One";
const int types[] = const int types[] =
{ {
TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_END TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_STRING, TOK_END, TOK_STRING, TOK_END
}; };
say(L"Test correct tokenization"); say(L"Test correct tokenization");

View file

@ -621,14 +621,22 @@ void tok_next(tokenizer_t *tok)
switch (*tok->buff) switch (*tok->buff)
{ {
case L'\0': case L'\0':
tok->last_type = TOK_END; tok->last_type = TOK_END;
/*fwprintf( stderr, L"End of string\n" );*/ /*fwprintf( stderr, L"End of string\n" );*/
tok->has_next = false; tok->has_next = false;
break; break;
case 13: case 13: // carriage return
case L'\n': case L'\n':
// Hack: when we get a newline, swallow as many as we can
// This compresses multiple subsequent newlines into a single one
while (*tok->buff == L'\n' || *tok->buff == 13 || *tok->buff == ' ' || *tok->buff == '\t')
{
tok->buff++;
}
tok->last_type = TOK_END;
break;
case L';': case L';':
tok->last_type = TOK_END; tok->last_type = TOK_END;
tok->buff++; tok->buff++;