From fd2a0dffa9656d4f63f2074ffee2a4edfe3c60ec Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 18 Feb 2018 14:37:44 -0800 Subject: [PATCH] Reflect newlines after pipes in fish grammar The previous attempt to support newlines after pipes changed the lexer to swallow newlines after encountering a pipe. This has two problems that are difficult to fix: 1. comments cannot be placed after the pipe 2. fish_indent won't know about the newlines, so it will erase them Address these problems by removing the lexer behavior, and replacing it with a new parser symbol "optional_newlines" allowing the newlines to be reflected directly in the fish grammar. --- CHANGELOG.md | 1 + src/fish_tests.cpp | 12 +++++++++ src/parse_constants.h | 2 ++ src/parse_execution.cpp | 4 +-- src/parse_grammar.h | 9 ++++++- src/parse_grammar_elements.inc | 1 + src/parse_productions.cpp | 45 ++++++++++------------------------ src/parse_tree.cpp | 15 +++++------- src/parse_tree.h | 1 + src/tnode.cpp | 2 +- src/tokenizer.cpp | 35 +++----------------------- src/tokenizer.h | 3 --- 12 files changed, 50 insertions(+), 80 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a997ddaf..4c7adf602 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ This section is for changes merged to the `major` branch that are not also merge - Slicing $history (in particular, `$history[1]` for the last executed command) is much faster. - The pager will now show the full command instead of just its last line if the number of completions is large (#4702). - Tildes in file names are now properly escaped in completions (#2274) +- A pipe at the end of a line now allows the job to continue on the next line (#1285) ## Other significant changes - Command substitution output is now limited to 10 MB by default (#3822). diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 5252a2b3a..0d9ceb208 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -739,6 +739,18 @@ static void test_parser() { err(L"leading pipe not reported properly"); } + if (parse_util_detect_errors(L"true | # comment") != PARSER_TEST_INCOMPLETE) { + err(L"comment after pipe not reported as incomplete"); + } + + if (parse_util_detect_errors(L"true | # comment \n false ")) { + err(L"comment and newline after pipe wrongly reported as error"); + } + + if (parse_util_detect_errors(L"true | ; false ") != PARSER_TEST_ERROR) { + err(L"semicolon after pipe not detected as error"); + } + if (detect_argument_errors(L"foo")) { err(L"simple argument reported as error"); } diff --git a/src/parse_constants.h b/src/parse_constants.h index ca6e739e4..b8e43fe0c 100644 --- a/src/parse_constants.h +++ b/src/parse_constants.h @@ -45,6 +45,7 @@ enum parse_token_type_t { symbol_argument, symbol_redirection, symbol_optional_background, + symbol_optional_newlines, symbol_end_command, // Terminal types. parse_token_type_string, @@ -99,6 +100,7 @@ const enum_map token_enum_map[] = { {symbol_job, L"symbol_job"}, {symbol_job_continuation, L"symbol_job_continuation"}, {symbol_job_list, L"symbol_job_list"}, + {symbol_optional_newlines, L"symbol_optional_newlines"}, {symbol_optional_background, L"symbol_optional_background"}, {symbol_plain_statement, L"symbol_plain_statement"}, {symbol_redirection, L"symbol_redirection"}, diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp index e1ff43dc6..428e088b9 100644 --- a/src/parse_execution.cpp +++ b/src/parse_execution.cpp @@ -1056,7 +1056,7 @@ parse_execution_result_t parse_execution_context_t::populate_job_from_job_node( if (result != parse_execution_success) { break; } - tnode_t statement = job_cont.require_get_child(); + tnode_t statement = job_cont.require_get_child(); // Handle the pipe, whose fd may not be the obvious stdout. int pipe_write_fd = fd_redirected_by_pipe(get_source(pipe)); @@ -1071,7 +1071,7 @@ parse_execution_result_t parse_execution_context_t::populate_job_from_job_node( result = this->populate_job_process(j, processes.back().get(), statement); // Get the next continuation. - job_cont = job_cont.require_get_child(); + job_cont = job_cont.require_get_child(); assert(job_cont); } diff --git a/src/parse_grammar.h b/src/parse_grammar.h index c6ea29dee..0526ef055 100644 --- a/src/parse_grammar.h +++ b/src/parse_grammar.h @@ -210,7 +210,7 @@ DEF_ALT(job_list) { DEF(job) produces_sequence{BODY(job)}; DEF_ALT(job_continuation) { - using piped = seq; + using piped = seq; using empty = grammar::empty; ALT_BODY(job_continuation, piped, empty); }; @@ -343,6 +343,13 @@ DEF_ALT(optional_background) { DEF(end_command) produces_single>{BODY(end_command)}; +// Note optional_newlines only allows newline-style tok_end, not semicolons. +DEF_ALT(optional_newlines) { + using empty = grammar::empty; + using newlines = seq; + ALT_BODY(optional_newlines, empty, newlines); +}; + // A freestanding_argument_list is equivalent to a normal argument list, except it may contain // TOK_END (newlines, and even semicolons, for historical reasons) DEF_ALT(freestanding_argument_list) { diff --git a/src/parse_grammar_elements.inc b/src/parse_grammar_elements.inc index 4142db65f..ff087eeb7 100644 --- a/src/parse_grammar_elements.inc +++ b/src/parse_grammar_elements.inc @@ -25,6 +25,7 @@ ELEM(arguments_or_redirections_list) ELEM(argument) ELEM(redirection) ELEM(optional_background) +ELEM(optional_newlines) ELEM(end_command) ELEM(freestanding_argument_list) #undef ELEM diff --git a/src/parse_productions.cpp b/src/parse_productions.cpp index 28d371d51..e97b27208 100644 --- a/src/parse_productions.cpp +++ b/src/parse_productions.cpp @@ -309,6 +309,12 @@ RESOLVE(arguments_or_redirections_list) { } } +RESOLVE(optional_newlines) { + UNUSED(token2); + if (token1.is_newline) return production_for(); + return production_for(); +} + RESOLVE(optional_background) { UNUSED(token2); @@ -324,10 +330,6 @@ RESOLVE(optional_background) { } } -#define TEST(SYM) \ - case (symbol_##SYM): \ - resolver = SYM::resolve; \ - break; const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type, const parse_token_t &input1, @@ -342,35 +344,14 @@ const production_element_t *parse_productions::production_for_token(parse_token_ parse_node_tag_t *out_tag) = //!OCLINT(unused param) NULL; switch (node_type) { - TEST(job_list) - TEST(job) - TEST(statement) - TEST(job_continuation) - TEST(boolean_statement) - TEST(block_statement) - TEST(if_statement) - TEST(if_clause) - TEST(else_clause) - TEST(else_continuation) - TEST(switch_statement) - TEST(decorated_statement) - TEST(case_item_list) - TEST(case_item) - TEST(argument_list) - TEST(freestanding_argument_list) - TEST(block_header) - TEST(for_header) - TEST(while_header) - TEST(begin_header) - TEST(function_header) - TEST(plain_statement) - TEST(andor_job_list) - TEST(arguments_or_redirections_list) - TEST(argument) - TEST(redirection) - TEST(optional_background) - TEST(end_command) +// Handle all of our grammar elements +#define ELEM(SYM) \ + case (symbol_##SYM): \ + resolver = SYM::resolve; \ + break; +#include "parse_grammar_elements.inc" + // Everything else is an error. case parse_token_type_string: case parse_token_type_pipe: case parse_token_type_redirection: diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp index b06bb54c2..ec5d31d4e 100644 --- a/src/parse_tree.cpp +++ b/src/parse_tree.cpp @@ -256,9 +256,6 @@ static inline parse_token_type_t parse_token_type_from_tokenizer_token( return result; } -#if 1 -// Disabled for the 2.2.0 release: https://github.com/fish-shell/fish-shell/issues/1809. - /// Helper function for parse_dump_tree(). static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src, node_offset_t node_idx, size_t indent, wcstring *result, @@ -284,7 +281,6 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring & append_format(*result, L"%2lu - %l2u ", *line, node_idx); result->append(indent * spacesPerIndent, L' '); - ; result->append(node.describe()); if (node.child_count > 0) { append_format(*result, L" <%lu children>", node.child_count); @@ -335,7 +331,6 @@ wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src) { } return result; } -#endif /// Struct representing elements of the symbol stack, used in the internal state of the LL parser. struct parse_stack_element_t { @@ -1046,12 +1041,12 @@ static parse_keyword_t keyword_for_token(token_type tok, const wcstring &token) } /// Placeholder invalid token. -static const parse_token_t kInvalidToken = { - token_type_invalid, parse_keyword_none, false, false, SOURCE_OFFSET_INVALID, 0}; +static constexpr parse_token_t kInvalidToken = { + token_type_invalid, parse_keyword_none, false, false, false, SOURCE_OFFSET_INVALID, 0}; /// Terminal token. -static const parse_token_t kTerminalToken = { - parse_token_type_terminate, parse_keyword_none, false, false, SOURCE_OFFSET_INVALID, 0}; +static constexpr parse_token_t kTerminalToken = { + parse_token_type_terminate, parse_keyword_none, false, false, false, SOURCE_OFFSET_INVALID, 0}; static inline bool is_help_argument(const wcstring &txt) { return txt == L"-h" || txt == L"--help"; @@ -1074,6 +1069,7 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok, tok_t *token) { result.keyword = keyword_for_token(token->type, token->text); result.has_dash_prefix = !token->text.empty() && token->text.at(0) == L'-'; result.is_help_argument = result.has_dash_prefix && is_help_argument(token->text); + result.is_newline = (result.type == parse_token_type_end && token->text == L"\n"); // These assertions are totally bogus. Basically our tokenizer works in size_t but we work in // uint32_t to save some space. If we have a source file larger than 4 GB, we'll probably just @@ -1157,6 +1153,7 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags, parse_keyword_none, false, false, + false, queue[error_token_idx].source_start, queue[error_token_idx].source_length}; parser.accept_tokens(token, kInvalidToken); diff --git a/src/parse_tree.h b/src/parse_tree.h index 7c5a1028b..ac46d6500 100644 --- a/src/parse_tree.h +++ b/src/parse_tree.h @@ -31,6 +31,7 @@ struct parse_token_t { enum parse_keyword_t keyword; // Any keyword represented by this token bool has_dash_prefix; // Hackish: whether the source contains a dash prefix bool is_help_argument; // Hackish: whether the source looks like '-h' or '--help' + bool is_newline; // Hackish: if TOK_END, whether the source is a newline. source_offset_t source_start; source_offset_t source_length; diff --git a/src/tnode.cpp b/src/tnode.cpp index 419d985cc..3df585d3b 100644 --- a/src/tnode.cpp +++ b/src/tnode.cpp @@ -121,7 +121,7 @@ bool statement_is_in_pipeline(tnode_t st, bool include_first // has a non-empty continuation. if (include_first) { tnode_t jc = st.try_get_parent().child<1>(); - if (jc.try_get_child()) { + if (jc.try_get_child()) { return true; } } diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index bdbd4d2a8..dccbd8191 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -102,16 +102,7 @@ bool tokenizer_t::next(struct tok_t *result) { } assert(this->buff >= this->orig_buff); - if (this->last_type == TOK_PIPE) { - // Ignore subsequent whitespaces or a newline after a pipe (#1285). - int pipe_pos = current_pos - 1; - while (this->orig_buff[pipe_pos] != L'|') { - pipe_pos--; - } - result->length = pipe_pos - this->last_pos + 1; - } else { - result->length = current_pos >= this->last_pos ? current_pos - this->last_pos : 0; - } + result->length = current_pos >= this->last_pos ? current_pos - this->last_pos : 0; this->tok_next(); return true; @@ -538,12 +529,14 @@ void tokenizer_t::tok_next() { this->last_type = TOK_END; // fwprintf( stderr, L"End of string\n" ); this->has_next = false; + this->last_token.clear(); break; } case L'\r': // carriage-return case L'\n': // newline case L';': { this->last_type = TOK_END; + this->last_token.assign(1, *this->buff); this->buff++; // Hack: when we get a newline, swallow as many as we can. This compresses multiple // subsequent newlines into a single one. @@ -553,7 +546,6 @@ void tokenizer_t::tok_next() { this->buff++; } } - this->last_token.clear(); break; } case L'&': { @@ -565,7 +557,6 @@ void tokenizer_t::tok_next() { this->last_token = L"1"; this->last_type = TOK_PIPE; this->buff++; - skip_newline_after_pipe(); break; } case L'>': @@ -580,9 +571,6 @@ void tokenizer_t::tok_next() { TOK_CALL_ERROR(this, TOK_OTHER, REDIRECT_ERROR, this->buff); } else { this->buff += consumed; - if (mode == TOK_PIPE) { - skip_newline_after_pipe(); - } this->last_type = mode; this->last_token = to_string(fd); } @@ -606,9 +594,6 @@ void tokenizer_t::tok_next() { TOK_CALL_ERROR(this, TOK_OTHER, PIPE_ERROR, error_location); } else { this->buff += consumed; - if (mode == TOK_PIPE) { - skip_newline_after_pipe(); - } this->last_type = mode; this->last_token = to_string(fd); } @@ -621,20 +606,6 @@ void tokenizer_t::tok_next() { } } -/// If the line ends with pipe, continue to the next line (#1285). -void tokenizer_t::skip_newline_after_pipe() { - while (1) { - if (this->buff[0] == L'\n') { - this->buff++; - break; - } else if (my_iswspace(this->buff[0])) { - this->buff++; - } else { - break; - } - } -} - wcstring tok_first(const wcstring &str) { wcstring result; tokenizer_t t(str.c_str(), TOK_SQUASH_ERRORS); diff --git a/src/tokenizer.h b/src/tokenizer.h index aaada6e7e..c87a696b2 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -107,9 +107,6 @@ class tokenizer_t { void read_comment(); void tok_next(); - /// Skip whitespaces and a newline after a pipe at the end of the line. - void skip_newline_after_pipe(); - public: /// Constructor for a tokenizer. b is the string that is to be tokenized. It is not copied, and /// should not be freed by the caller until after the tokenizer is destroyed.