mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-28 04:35:09 +00:00
Reflect newlines after pipes in fish grammar
The previous attempt to support newlines after pipes changed the lexer to swallow newlines after encountering a pipe. This has two problems that are difficult to fix: 1. comments cannot be placed after the pipe 2. fish_indent won't know about the newlines, so it will erase them Address these problems by removing the lexer behavior, and replacing it with a new parser symbol "optional_newlines" allowing the newlines to be reflected directly in the fish grammar.
This commit is contained in:
parent
ea4e997dc9
commit
fd2a0dffa9
12 changed files with 50 additions and 80 deletions
|
@ -38,6 +38,7 @@ This section is for changes merged to the `major` branch that are not also merge
|
||||||
- Slicing $history (in particular, `$history[1]` for the last executed command) is much faster.
|
- Slicing $history (in particular, `$history[1]` for the last executed command) is much faster.
|
||||||
- The pager will now show the full command instead of just its last line if the number of completions is large (#4702).
|
- The pager will now show the full command instead of just its last line if the number of completions is large (#4702).
|
||||||
- Tildes in file names are now properly escaped in completions (#2274)
|
- Tildes in file names are now properly escaped in completions (#2274)
|
||||||
|
- A pipe at the end of a line now allows the job to continue on the next line (#1285)
|
||||||
|
|
||||||
## Other significant changes
|
## Other significant changes
|
||||||
- Command substitution output is now limited to 10 MB by default (#3822).
|
- Command substitution output is now limited to 10 MB by default (#3822).
|
||||||
|
|
|
@ -739,6 +739,18 @@ static void test_parser() {
|
||||||
err(L"leading pipe not reported properly");
|
err(L"leading pipe not reported properly");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (parse_util_detect_errors(L"true | # comment") != PARSER_TEST_INCOMPLETE) {
|
||||||
|
err(L"comment after pipe not reported as incomplete");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parse_util_detect_errors(L"true | # comment \n false ")) {
|
||||||
|
err(L"comment and newline after pipe wrongly reported as error");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parse_util_detect_errors(L"true | ; false ") != PARSER_TEST_ERROR) {
|
||||||
|
err(L"semicolon after pipe not detected as error");
|
||||||
|
}
|
||||||
|
|
||||||
if (detect_argument_errors(L"foo")) {
|
if (detect_argument_errors(L"foo")) {
|
||||||
err(L"simple argument reported as error");
|
err(L"simple argument reported as error");
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,6 +45,7 @@ enum parse_token_type_t {
|
||||||
symbol_argument,
|
symbol_argument,
|
||||||
symbol_redirection,
|
symbol_redirection,
|
||||||
symbol_optional_background,
|
symbol_optional_background,
|
||||||
|
symbol_optional_newlines,
|
||||||
symbol_end_command,
|
symbol_end_command,
|
||||||
// Terminal types.
|
// Terminal types.
|
||||||
parse_token_type_string,
|
parse_token_type_string,
|
||||||
|
@ -99,6 +100,7 @@ const enum_map<parse_token_type_t> token_enum_map[] = {
|
||||||
{symbol_job, L"symbol_job"},
|
{symbol_job, L"symbol_job"},
|
||||||
{symbol_job_continuation, L"symbol_job_continuation"},
|
{symbol_job_continuation, L"symbol_job_continuation"},
|
||||||
{symbol_job_list, L"symbol_job_list"},
|
{symbol_job_list, L"symbol_job_list"},
|
||||||
|
{symbol_optional_newlines, L"symbol_optional_newlines"},
|
||||||
{symbol_optional_background, L"symbol_optional_background"},
|
{symbol_optional_background, L"symbol_optional_background"},
|
||||||
{symbol_plain_statement, L"symbol_plain_statement"},
|
{symbol_plain_statement, L"symbol_plain_statement"},
|
||||||
{symbol_redirection, L"symbol_redirection"},
|
{symbol_redirection, L"symbol_redirection"},
|
||||||
|
|
|
@ -1056,7 +1056,7 @@ parse_execution_result_t parse_execution_context_t::populate_job_from_job_node(
|
||||||
if (result != parse_execution_success) {
|
if (result != parse_execution_success) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
tnode_t<g::statement> statement = job_cont.require_get_child<g::statement, 1>();
|
tnode_t<g::statement> statement = job_cont.require_get_child<g::statement, 2>();
|
||||||
|
|
||||||
// Handle the pipe, whose fd may not be the obvious stdout.
|
// Handle the pipe, whose fd may not be the obvious stdout.
|
||||||
int pipe_write_fd = fd_redirected_by_pipe(get_source(pipe));
|
int pipe_write_fd = fd_redirected_by_pipe(get_source(pipe));
|
||||||
|
@ -1071,7 +1071,7 @@ parse_execution_result_t parse_execution_context_t::populate_job_from_job_node(
|
||||||
result = this->populate_job_process(j, processes.back().get(), statement);
|
result = this->populate_job_process(j, processes.back().get(), statement);
|
||||||
|
|
||||||
// Get the next continuation.
|
// Get the next continuation.
|
||||||
job_cont = job_cont.require_get_child<g::job_continuation, 2>();
|
job_cont = job_cont.require_get_child<g::job_continuation, 3>();
|
||||||
assert(job_cont);
|
assert(job_cont);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -210,7 +210,7 @@ DEF_ALT(job_list) {
|
||||||
DEF(job) produces_sequence<statement, job_continuation, optional_background>{BODY(job)};
|
DEF(job) produces_sequence<statement, job_continuation, optional_background>{BODY(job)};
|
||||||
|
|
||||||
DEF_ALT(job_continuation) {
|
DEF_ALT(job_continuation) {
|
||||||
using piped = seq<tok_pipe, statement, job_continuation>;
|
using piped = seq<tok_pipe, optional_newlines, statement, job_continuation>;
|
||||||
using empty = grammar::empty;
|
using empty = grammar::empty;
|
||||||
ALT_BODY(job_continuation, piped, empty);
|
ALT_BODY(job_continuation, piped, empty);
|
||||||
};
|
};
|
||||||
|
@ -343,6 +343,13 @@ DEF_ALT(optional_background) {
|
||||||
|
|
||||||
DEF(end_command) produces_single<keyword<parse_keyword_end>>{BODY(end_command)};
|
DEF(end_command) produces_single<keyword<parse_keyword_end>>{BODY(end_command)};
|
||||||
|
|
||||||
|
// Note optional_newlines only allows newline-style tok_end, not semicolons.
|
||||||
|
DEF_ALT(optional_newlines) {
|
||||||
|
using empty = grammar::empty;
|
||||||
|
using newlines = seq<tok_end, optional_newlines>;
|
||||||
|
ALT_BODY(optional_newlines, empty, newlines);
|
||||||
|
};
|
||||||
|
|
||||||
// A freestanding_argument_list is equivalent to a normal argument list, except it may contain
|
// A freestanding_argument_list is equivalent to a normal argument list, except it may contain
|
||||||
// TOK_END (newlines, and even semicolons, for historical reasons)
|
// TOK_END (newlines, and even semicolons, for historical reasons)
|
||||||
DEF_ALT(freestanding_argument_list) {
|
DEF_ALT(freestanding_argument_list) {
|
||||||
|
|
|
@ -25,6 +25,7 @@ ELEM(arguments_or_redirections_list)
|
||||||
ELEM(argument)
|
ELEM(argument)
|
||||||
ELEM(redirection)
|
ELEM(redirection)
|
||||||
ELEM(optional_background)
|
ELEM(optional_background)
|
||||||
|
ELEM(optional_newlines)
|
||||||
ELEM(end_command)
|
ELEM(end_command)
|
||||||
ELEM(freestanding_argument_list)
|
ELEM(freestanding_argument_list)
|
||||||
#undef ELEM
|
#undef ELEM
|
||||||
|
|
|
@ -309,6 +309,12 @@ RESOLVE(arguments_or_redirections_list) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RESOLVE(optional_newlines) {
|
||||||
|
UNUSED(token2);
|
||||||
|
if (token1.is_newline) return production_for<newlines>();
|
||||||
|
return production_for<empty>();
|
||||||
|
}
|
||||||
|
|
||||||
RESOLVE(optional_background) {
|
RESOLVE(optional_background) {
|
||||||
UNUSED(token2);
|
UNUSED(token2);
|
||||||
|
|
||||||
|
@ -324,10 +330,6 @@ RESOLVE(optional_background) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define TEST(SYM) \
|
|
||||||
case (symbol_##SYM): \
|
|
||||||
resolver = SYM::resolve; \
|
|
||||||
break;
|
|
||||||
|
|
||||||
const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type,
|
const production_element_t *parse_productions::production_for_token(parse_token_type_t node_type,
|
||||||
const parse_token_t &input1,
|
const parse_token_t &input1,
|
||||||
|
@ -342,35 +344,14 @@ const production_element_t *parse_productions::production_for_token(parse_token_
|
||||||
parse_node_tag_t *out_tag) = //!OCLINT(unused param)
|
parse_node_tag_t *out_tag) = //!OCLINT(unused param)
|
||||||
NULL;
|
NULL;
|
||||||
switch (node_type) {
|
switch (node_type) {
|
||||||
TEST(job_list)
|
// Handle all of our grammar elements
|
||||||
TEST(job)
|
#define ELEM(SYM) \
|
||||||
TEST(statement)
|
case (symbol_##SYM): \
|
||||||
TEST(job_continuation)
|
resolver = SYM::resolve; \
|
||||||
TEST(boolean_statement)
|
break;
|
||||||
TEST(block_statement)
|
#include "parse_grammar_elements.inc"
|
||||||
TEST(if_statement)
|
|
||||||
TEST(if_clause)
|
|
||||||
TEST(else_clause)
|
|
||||||
TEST(else_continuation)
|
|
||||||
TEST(switch_statement)
|
|
||||||
TEST(decorated_statement)
|
|
||||||
TEST(case_item_list)
|
|
||||||
TEST(case_item)
|
|
||||||
TEST(argument_list)
|
|
||||||
TEST(freestanding_argument_list)
|
|
||||||
TEST(block_header)
|
|
||||||
TEST(for_header)
|
|
||||||
TEST(while_header)
|
|
||||||
TEST(begin_header)
|
|
||||||
TEST(function_header)
|
|
||||||
TEST(plain_statement)
|
|
||||||
TEST(andor_job_list)
|
|
||||||
TEST(arguments_or_redirections_list)
|
|
||||||
TEST(argument)
|
|
||||||
TEST(redirection)
|
|
||||||
TEST(optional_background)
|
|
||||||
TEST(end_command)
|
|
||||||
|
|
||||||
|
// Everything else is an error.
|
||||||
case parse_token_type_string:
|
case parse_token_type_string:
|
||||||
case parse_token_type_pipe:
|
case parse_token_type_pipe:
|
||||||
case parse_token_type_redirection:
|
case parse_token_type_redirection:
|
||||||
|
|
|
@ -256,9 +256,6 @@ static inline parse_token_type_t parse_token_type_from_tokenizer_token(
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 1
|
|
||||||
// Disabled for the 2.2.0 release: https://github.com/fish-shell/fish-shell/issues/1809.
|
|
||||||
|
|
||||||
/// Helper function for parse_dump_tree().
|
/// Helper function for parse_dump_tree().
|
||||||
static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src,
|
static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &src,
|
||||||
node_offset_t node_idx, size_t indent, wcstring *result,
|
node_offset_t node_idx, size_t indent, wcstring *result,
|
||||||
|
@ -284,7 +281,6 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &
|
||||||
|
|
||||||
append_format(*result, L"%2lu - %l2u ", *line, node_idx);
|
append_format(*result, L"%2lu - %l2u ", *line, node_idx);
|
||||||
result->append(indent * spacesPerIndent, L' ');
|
result->append(indent * spacesPerIndent, L' ');
|
||||||
;
|
|
||||||
result->append(node.describe());
|
result->append(node.describe());
|
||||||
if (node.child_count > 0) {
|
if (node.child_count > 0) {
|
||||||
append_format(*result, L" <%lu children>", node.child_count);
|
append_format(*result, L" <%lu children>", node.child_count);
|
||||||
|
@ -335,7 +331,6 @@ wcstring parse_dump_tree(const parse_node_tree_t &nodes, const wcstring &src) {
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
/// Struct representing elements of the symbol stack, used in the internal state of the LL parser.
|
/// Struct representing elements of the symbol stack, used in the internal state of the LL parser.
|
||||||
struct parse_stack_element_t {
|
struct parse_stack_element_t {
|
||||||
|
@ -1046,12 +1041,12 @@ static parse_keyword_t keyword_for_token(token_type tok, const wcstring &token)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Placeholder invalid token.
|
/// Placeholder invalid token.
|
||||||
static const parse_token_t kInvalidToken = {
|
static constexpr parse_token_t kInvalidToken = {
|
||||||
token_type_invalid, parse_keyword_none, false, false, SOURCE_OFFSET_INVALID, 0};
|
token_type_invalid, parse_keyword_none, false, false, false, SOURCE_OFFSET_INVALID, 0};
|
||||||
|
|
||||||
/// Terminal token.
|
/// Terminal token.
|
||||||
static const parse_token_t kTerminalToken = {
|
static constexpr parse_token_t kTerminalToken = {
|
||||||
parse_token_type_terminate, parse_keyword_none, false, false, SOURCE_OFFSET_INVALID, 0};
|
parse_token_type_terminate, parse_keyword_none, false, false, false, SOURCE_OFFSET_INVALID, 0};
|
||||||
|
|
||||||
static inline bool is_help_argument(const wcstring &txt) {
|
static inline bool is_help_argument(const wcstring &txt) {
|
||||||
return txt == L"-h" || txt == L"--help";
|
return txt == L"-h" || txt == L"--help";
|
||||||
|
@ -1074,6 +1069,7 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok, tok_t *token) {
|
||||||
result.keyword = keyword_for_token(token->type, token->text);
|
result.keyword = keyword_for_token(token->type, token->text);
|
||||||
result.has_dash_prefix = !token->text.empty() && token->text.at(0) == L'-';
|
result.has_dash_prefix = !token->text.empty() && token->text.at(0) == L'-';
|
||||||
result.is_help_argument = result.has_dash_prefix && is_help_argument(token->text);
|
result.is_help_argument = result.has_dash_prefix && is_help_argument(token->text);
|
||||||
|
result.is_newline = (result.type == parse_token_type_end && token->text == L"\n");
|
||||||
|
|
||||||
// These assertions are totally bogus. Basically our tokenizer works in size_t but we work in
|
// These assertions are totally bogus. Basically our tokenizer works in size_t but we work in
|
||||||
// uint32_t to save some space. If we have a source file larger than 4 GB, we'll probably just
|
// uint32_t to save some space. If we have a source file larger than 4 GB, we'll probably just
|
||||||
|
@ -1157,6 +1153,7 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags,
|
||||||
parse_keyword_none,
|
parse_keyword_none,
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
|
false,
|
||||||
queue[error_token_idx].source_start,
|
queue[error_token_idx].source_start,
|
||||||
queue[error_token_idx].source_length};
|
queue[error_token_idx].source_length};
|
||||||
parser.accept_tokens(token, kInvalidToken);
|
parser.accept_tokens(token, kInvalidToken);
|
||||||
|
|
|
@ -31,6 +31,7 @@ struct parse_token_t {
|
||||||
enum parse_keyword_t keyword; // Any keyword represented by this token
|
enum parse_keyword_t keyword; // Any keyword represented by this token
|
||||||
bool has_dash_prefix; // Hackish: whether the source contains a dash prefix
|
bool has_dash_prefix; // Hackish: whether the source contains a dash prefix
|
||||||
bool is_help_argument; // Hackish: whether the source looks like '-h' or '--help'
|
bool is_help_argument; // Hackish: whether the source looks like '-h' or '--help'
|
||||||
|
bool is_newline; // Hackish: if TOK_END, whether the source is a newline.
|
||||||
source_offset_t source_start;
|
source_offset_t source_start;
|
||||||
source_offset_t source_length;
|
source_offset_t source_length;
|
||||||
|
|
||||||
|
|
|
@ -121,7 +121,7 @@ bool statement_is_in_pipeline(tnode_t<grammar::statement> st, bool include_first
|
||||||
// has a non-empty continuation.
|
// has a non-empty continuation.
|
||||||
if (include_first) {
|
if (include_first) {
|
||||||
tnode_t<job_continuation> jc = st.try_get_parent<job>().child<1>();
|
tnode_t<job_continuation> jc = st.try_get_parent<job>().child<1>();
|
||||||
if (jc.try_get_child<statement, 1>()) {
|
if (jc.try_get_child<statement, 2>()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -102,16 +102,7 @@ bool tokenizer_t::next(struct tok_t *result) {
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(this->buff >= this->orig_buff);
|
assert(this->buff >= this->orig_buff);
|
||||||
if (this->last_type == TOK_PIPE) {
|
result->length = current_pos >= this->last_pos ? current_pos - this->last_pos : 0;
|
||||||
// Ignore subsequent whitespaces or a newline after a pipe (#1285).
|
|
||||||
int pipe_pos = current_pos - 1;
|
|
||||||
while (this->orig_buff[pipe_pos] != L'|') {
|
|
||||||
pipe_pos--;
|
|
||||||
}
|
|
||||||
result->length = pipe_pos - this->last_pos + 1;
|
|
||||||
} else {
|
|
||||||
result->length = current_pos >= this->last_pos ? current_pos - this->last_pos : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
this->tok_next();
|
this->tok_next();
|
||||||
return true;
|
return true;
|
||||||
|
@ -538,12 +529,14 @@ void tokenizer_t::tok_next() {
|
||||||
this->last_type = TOK_END;
|
this->last_type = TOK_END;
|
||||||
// fwprintf( stderr, L"End of string\n" );
|
// fwprintf( stderr, L"End of string\n" );
|
||||||
this->has_next = false;
|
this->has_next = false;
|
||||||
|
this->last_token.clear();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case L'\r': // carriage-return
|
case L'\r': // carriage-return
|
||||||
case L'\n': // newline
|
case L'\n': // newline
|
||||||
case L';': {
|
case L';': {
|
||||||
this->last_type = TOK_END;
|
this->last_type = TOK_END;
|
||||||
|
this->last_token.assign(1, *this->buff);
|
||||||
this->buff++;
|
this->buff++;
|
||||||
// Hack: when we get a newline, swallow as many as we can. This compresses multiple
|
// Hack: when we get a newline, swallow as many as we can. This compresses multiple
|
||||||
// subsequent newlines into a single one.
|
// subsequent newlines into a single one.
|
||||||
|
@ -553,7 +546,6 @@ void tokenizer_t::tok_next() {
|
||||||
this->buff++;
|
this->buff++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this->last_token.clear();
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case L'&': {
|
case L'&': {
|
||||||
|
@ -565,7 +557,6 @@ void tokenizer_t::tok_next() {
|
||||||
this->last_token = L"1";
|
this->last_token = L"1";
|
||||||
this->last_type = TOK_PIPE;
|
this->last_type = TOK_PIPE;
|
||||||
this->buff++;
|
this->buff++;
|
||||||
skip_newline_after_pipe();
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case L'>':
|
case L'>':
|
||||||
|
@ -580,9 +571,6 @@ void tokenizer_t::tok_next() {
|
||||||
TOK_CALL_ERROR(this, TOK_OTHER, REDIRECT_ERROR, this->buff);
|
TOK_CALL_ERROR(this, TOK_OTHER, REDIRECT_ERROR, this->buff);
|
||||||
} else {
|
} else {
|
||||||
this->buff += consumed;
|
this->buff += consumed;
|
||||||
if (mode == TOK_PIPE) {
|
|
||||||
skip_newline_after_pipe();
|
|
||||||
}
|
|
||||||
this->last_type = mode;
|
this->last_type = mode;
|
||||||
this->last_token = to_string(fd);
|
this->last_token = to_string(fd);
|
||||||
}
|
}
|
||||||
|
@ -606,9 +594,6 @@ void tokenizer_t::tok_next() {
|
||||||
TOK_CALL_ERROR(this, TOK_OTHER, PIPE_ERROR, error_location);
|
TOK_CALL_ERROR(this, TOK_OTHER, PIPE_ERROR, error_location);
|
||||||
} else {
|
} else {
|
||||||
this->buff += consumed;
|
this->buff += consumed;
|
||||||
if (mode == TOK_PIPE) {
|
|
||||||
skip_newline_after_pipe();
|
|
||||||
}
|
|
||||||
this->last_type = mode;
|
this->last_type = mode;
|
||||||
this->last_token = to_string(fd);
|
this->last_token = to_string(fd);
|
||||||
}
|
}
|
||||||
|
@ -621,20 +606,6 @@ void tokenizer_t::tok_next() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If the line ends with pipe, continue to the next line (#1285).
|
|
||||||
void tokenizer_t::skip_newline_after_pipe() {
|
|
||||||
while (1) {
|
|
||||||
if (this->buff[0] == L'\n') {
|
|
||||||
this->buff++;
|
|
||||||
break;
|
|
||||||
} else if (my_iswspace(this->buff[0])) {
|
|
||||||
this->buff++;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
wcstring tok_first(const wcstring &str) {
|
wcstring tok_first(const wcstring &str) {
|
||||||
wcstring result;
|
wcstring result;
|
||||||
tokenizer_t t(str.c_str(), TOK_SQUASH_ERRORS);
|
tokenizer_t t(str.c_str(), TOK_SQUASH_ERRORS);
|
||||||
|
|
|
@ -107,9 +107,6 @@ class tokenizer_t {
|
||||||
void read_comment();
|
void read_comment();
|
||||||
void tok_next();
|
void tok_next();
|
||||||
|
|
||||||
/// Skip whitespaces and a newline after a pipe at the end of the line.
|
|
||||||
void skip_newline_after_pipe();
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// Constructor for a tokenizer. b is the string that is to be tokenized. It is not copied, and
|
/// Constructor for a tokenizer. b is the string that is to be tokenized. It is not copied, and
|
||||||
/// should not be freed by the caller until after the tokenizer is destroyed.
|
/// should not be freed by the caller until after the tokenizer is destroyed.
|
||||||
|
|
Loading…
Reference in a new issue