Clean up and unify pipes and redirections

This cleans up how pipes and redirections are recognized by the parser,
and unifies pipes and redirections into a single type.
This commit is contained in:
ridiculousfish 2019-10-14 13:20:31 -07:00
parent 2d2e15b63d
commit 35671dd9f0
7 changed files with 280 additions and 183 deletions

View file

@ -669,35 +669,60 @@ static void test_tokenizer() {
do_test(token->error_offset == 4); do_test(token->error_offset == 4);
} }
// Test redirection_type_for_string. // Test some redirection parsing.
if (redirection_type_for_string(L"<") != redirection_type_t::input) auto pipe_or_redir = [](const wchar_t *s) { return pipe_or_redir_t::from_string(s); };
do_test(pipe_or_redir(L"|")->is_pipe);
do_test(pipe_or_redir(L"0>|")->is_pipe);
do_test(pipe_or_redir(L"0>|")->fd == 0);
do_test(pipe_or_redir(L"2>|")->is_pipe);
do_test(pipe_or_redir(L"2>|")->fd == 2);
do_test(pipe_or_redir(L">|")->is_pipe);
do_test(pipe_or_redir(L">|")->fd == STDOUT_FILENO);
do_test(!pipe_or_redir(L">")->is_pipe);
do_test(pipe_or_redir(L">")->fd == STDOUT_FILENO);
do_test(pipe_or_redir(L"2>")->fd == STDERR_FILENO);
do_test(pipe_or_redir(L"9999999999999>")->fd == -1);
do_test(pipe_or_redir(L"9999999999999>&2")->fd == -1);
do_test(pipe_or_redir(L"9999999999999>&2")->is_valid() == false);
do_test(pipe_or_redir(L"9999999999999>&2")->is_valid() == false);
auto get_redir_mode = [](const wchar_t *s) -> maybe_t<redirection_mode_t> {
if (auto redir = pipe_or_redir_t::from_string(s)) {
return redir->mode;
}
return none();
};
if (get_redir_mode(L"<") != redirection_mode_t::input)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"^") != redirection_type_t::overwrite) if (get_redir_mode(L"^") != redirection_mode_t::overwrite)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L">") != redirection_type_t::overwrite) if (get_redir_mode(L">") != redirection_mode_t::overwrite)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"2>") != redirection_type_t::overwrite) if (get_redir_mode(L"2>") != redirection_mode_t::overwrite)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L">>") != redirection_type_t::append) if (get_redir_mode(L">>") != redirection_mode_t::append)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"2>>") != redirection_type_t::append) if (get_redir_mode(L"2>>") != redirection_mode_t::append)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"2>?") != redirection_type_t::noclob) if (get_redir_mode(L"2>?") != redirection_mode_t::noclob)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"9999999999999999>?")) if (get_redir_mode(L"9999999999999999>?") != redirection_mode_t::noclob)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"2>&3") != redirection_type_t::fd) if (get_redir_mode(L"2>&3") != redirection_mode_t::fd)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"2>|")) if (get_redir_mode(L"3<&0") != redirection_mode_t::fd)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (get_redir_mode(L"3</tmp/filetxt") != redirection_mode_t::input)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
// Test ^ with our feature flag on and off. // Test ^ with our feature flag on and off.
auto saved_flags = fish_features(); auto saved_flags = fish_features();
mutable_fish_features().set(features_t::stderr_nocaret, false); mutable_fish_features().set(features_t::stderr_nocaret, false);
if (redirection_type_for_string(L"^") != redirection_type_t::overwrite) if (get_redir_mode(L"^") != redirection_mode_t::overwrite)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
mutable_fish_features().set(features_t::stderr_nocaret, true); mutable_fish_features().set(features_t::stderr_nocaret, true);
if (redirection_type_for_string(L"^") != none()) if (get_redir_mode(L"^") != none())
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
mutable_fish_features() = saved_flags; mutable_fish_features() = saved_flags;
} }
@ -1004,8 +1029,8 @@ static void test_1_cancellation(const wchar_t *src) {
parser_t::principal_parser().eval(src, io_chain_t{filler}, TOP); parser_t::principal_parser().eval(src, io_chain_t{filler}, TOP);
auto buffer = io_bufferfill_t::finish(std::move(filler)); auto buffer = io_bufferfill_t::finish(std::move(filler));
if (buffer->buffer().size() != 0) { if (buffer->buffer().size() != 0) {
err(L"Expected 0 bytes in out_buff, but instead found %lu bytes\n", err(L"Expected 0 bytes in out_buff, but instead found %lu bytes, for command %ls\n",
buffer->buffer().size()); buffer->buffer().size(), src);
} }
iothread_drain_all(); iothread_drain_all();
} }
@ -5435,7 +5460,7 @@ int main(int argc, char **argv) {
if (should_test_function("format")) test_format(); if (should_test_function("format")) test_format();
if (should_test_function("convert")) test_convert(); if (should_test_function("convert")) test_convert();
if (should_test_function("convert_nulls")) test_convert_nulls(); if (should_test_function("convert_nulls")) test_convert_nulls();
if (should_test_function("tok")) test_tokenizer(); if (should_test_function("tokenizer")) test_tokenizer();
if (should_test_function("iothread")) test_iothread(); if (should_test_function("iothread")) test_iothread();
if (should_test_function("pthread")) test_pthread(); if (should_test_function("pthread")) test_pthread();
if (should_test_function("parser")) test_parser(); if (should_test_function("parser")) test_parser();

View file

@ -958,11 +958,12 @@ void highlighter_t::color_redirection(tnode_t<g::redirection> redirection_node)
if (redir_prim) { if (redir_prim) {
wcstring target; wcstring target;
const maybe_t<redirection_type_t> redirect_type = const maybe_t<pipe_or_redir_t> redirect =
redirection_type(redirection_node, this->buff, nullptr, &target); redirection_for_node(redirection_node, this->buff, &target);
// We may get a missing redirection type if the redirection is invalid. // We may get a missing redirection type if the redirection is invalid.
auto hl = redirect_type ? highlight_role_t::redirection : highlight_role_t::error; auto hl = (redirect && redirect->is_valid()) ? highlight_role_t::redirection
: highlight_role_t::error;
this->color_node(redir_prim, hl); this->color_node(redir_prim, hl);
// Check if the argument contains a command substitution. If so, highlight it as a param // Check if the argument contains a command substitution. If so, highlight it as a param
@ -974,7 +975,7 @@ void highlighter_t::color_redirection(tnode_t<g::redirection> redirection_node)
// disallow redirections into a non-existent directory. // disallow redirections into a non-existent directory.
bool target_is_valid = true; bool target_is_valid = true;
if (!redirect_type) { if (!redirect || !redirect->is_valid()) {
// not a valid redirection // not a valid redirection
target_is_valid = false; target_is_valid = false;
} else if (!this->io_ok) { } else if (!this->io_ok) {
@ -990,8 +991,8 @@ void highlighter_t::color_redirection(tnode_t<g::redirection> redirection_node)
// redirections). Note that the target is now unescaped. // redirections). Note that the target is now unescaped.
const wcstring target_path = const wcstring target_path =
path_apply_working_directory(target, this->working_directory); path_apply_working_directory(target, this->working_directory);
switch (*redirect_type) { switch (redirect->mode) {
case redirection_type_t::fd: { case redirection_mode_t::fd: {
if (target == L"-") { if (target == L"-") {
target_is_valid = true; target_is_valid = true;
} else { } else {
@ -1000,16 +1001,16 @@ void highlighter_t::color_redirection(tnode_t<g::redirection> redirection_node)
} }
break; break;
} }
case redirection_type_t::input: { case redirection_mode_t::input: {
// Input redirections must have a readable non-directory. // Input redirections must have a readable non-directory.
struct stat buf = {}; struct stat buf = {};
target_is_valid = !waccess(target_path, R_OK) && target_is_valid = !waccess(target_path, R_OK) &&
!wstat(target_path, &buf) && !S_ISDIR(buf.st_mode); !wstat(target_path, &buf) && !S_ISDIR(buf.st_mode);
break; break;
} }
case redirection_type_t::overwrite: case redirection_mode_t::overwrite:
case redirection_type_t::append: case redirection_mode_t::append:
case redirection_type_t::noclob: { case redirection_mode_t::noclob: {
// Test whether the file exists, and whether it's writable (possibly after // Test whether the file exists, and whether it's writable (possibly after
// creating it). access() returns failure if the file does not exist. // creating it). access() returns failure if the file does not exist.
bool file_exists = false, file_is_writable = false; bool file_exists = false, file_is_writable = false;
@ -1053,7 +1054,7 @@ void highlighter_t::color_redirection(tnode_t<g::redirection> redirection_node)
// NOCLOB means that we must not overwrite files that exist. // NOCLOB means that we must not overwrite files that exist.
target_is_valid = target_is_valid =
file_is_writable && file_is_writable &&
!(file_exists && redirect_type == redirection_type_t::noclob); !(file_exists && redirect->mode == redirection_mode_t::noclob);
break; break;
} }
} }

View file

@ -967,9 +967,15 @@ bool parse_execution_context_t::determine_io_chain(tnode_t<g::arguments_or_redir
// Get all redirection nodes underneath the statement. // Get all redirection nodes underneath the statement.
while (auto redirect_node = node.next_in_list<g::redirection>()) { while (auto redirect_node = node.next_in_list<g::redirection>()) {
int source_fd = -1; // source fd
wcstring target; // file path or target fd wcstring target; // file path or target fd
auto redirect_type = redirection_type(redirect_node, pstree->src, &source_fd, &target); auto redirect = redirection_for_node(redirect_node, pstree->src, &target);
if (!redirect || !redirect->is_valid()) {
// TODO: improve this error message.
report_error(redirect_node, _(L"Invalid redirection: %ls"),
redirect_node.get_source(pstree->src).c_str());
return false;
}
// PCA: I can't justify this skip_variables flag. It was like this when I got here. // PCA: I can't justify this skip_variables flag. It was like this when I got here.
bool target_expanded = bool target_expanded =
@ -977,17 +983,17 @@ bool parse_execution_context_t::determine_io_chain(tnode_t<g::arguments_or_redir
parser->vars(), parser->shared()); parser->vars(), parser->shared());
if (!target_expanded || target.empty()) { if (!target_expanded || target.empty()) {
// TODO: Improve this error message. // TODO: Improve this error message.
errored =
report_error(redirect_node, _(L"Invalid redirection target: %ls"), target.c_str()); report_error(redirect_node, _(L"Invalid redirection target: %ls"), target.c_str());
return false;
} }
// Generate the actual IO redirection. // Generate the actual IO redirection.
shared_ptr<io_data_t> new_io; shared_ptr<io_data_t> new_io;
assert(redirect_type && "expected to have a valid redirection"); assert(redirect && redirect->is_valid() && "expected to have a valid redirection");
switch (*redirect_type) { switch (redirect->mode) {
case redirection_type_t::fd: { case redirection_mode_t::fd: {
if (target == L"-") { if (target == L"-") {
new_io.reset(new io_close_t(source_fd)); new_io.reset(new io_close_t(redirect->fd));
} else { } else {
int old_fd = fish_wcstoi(target.c_str()); int old_fd = fish_wcstoi(target.c_str());
if (errno || old_fd < 0) { if (errno || old_fd < 0) {
@ -996,14 +1002,14 @@ bool parse_execution_context_t::determine_io_chain(tnode_t<g::arguments_or_redir
L"which is not a valid file descriptor"); L"which is not a valid file descriptor");
errored = report_error(redirect_node, fmt, target.c_str()); errored = report_error(redirect_node, fmt, target.c_str());
} else { } else {
new_io.reset(new io_fd_t(source_fd, old_fd, true)); new_io.reset(new io_fd_t(redirect->fd, old_fd, true));
} }
} }
break; break;
} }
default: { default: {
int oflags = oflags_for_redirection_type(*redirect_type); int oflags = redirect->oflags();
io_file_t *new_io_file = new io_file_t(source_fd, target, oflags); io_file_t *new_io_file = new io_file_t(redirect->fd, target, oflags);
new_io.reset(new_io_file); new_io.reset(new_io_file);
break; break;
} }
@ -1127,12 +1133,13 @@ parse_execution_result_t parse_execution_context_t::populate_job_from_job_node(
tnode_t<g::statement> statement = job_cont.require_get_child<g::statement, 2>(); tnode_t<g::statement> statement = job_cont.require_get_child<g::statement, 2>();
// Handle the pipe, whose fd may not be the obvious stdout. // Handle the pipe, whose fd may not be the obvious stdout.
int pipe_write_fd = fd_redirected_by_pipe(get_source(pipe)); auto parsed_pipe = pipe_or_redir_t::from_string(get_source(pipe));
if (pipe_write_fd == -1) { assert(parsed_pipe.has_value() && parsed_pipe->is_pipe && "Failed to parse valid pipe");
if (!parsed_pipe->is_valid()) {
result = report_error(pipe, ILLEGAL_FD_ERR_MSG, get_source(pipe).c_str()); result = report_error(pipe, ILLEGAL_FD_ERR_MSG, get_source(pipe).c_str());
break; break;
} }
processes.back()->pipe_write_fd = pipe_write_fd; processes.back()->pipe_write_fd = parsed_pipe->fd;
// Store the new process (and maybe with an error). // Store the new process (and maybe with an error).
processes.emplace_back(new process_t()); processes.emplace_back(new process_t());

View file

@ -55,19 +55,20 @@ enum parse_bool_statement_type_t bool_statement_type(
return static_cast<parse_bool_statement_type_t>(cont.tag()); return static_cast<parse_bool_statement_type_t>(cont.tag());
} }
maybe_t<redirection_type_t> redirection_type(tnode_t<grammar::redirection> redirection, maybe_t<pipe_or_redir_t> redirection_for_node(tnode_t<grammar::redirection> redirection,
const wcstring &src, int *out_fd, const wcstring &src, wcstring *out_target) {
wcstring *out_target) {
assert(redirection && "redirection is missing"); assert(redirection && "redirection is missing");
maybe_t<redirection_type_t> result{};
tnode_t<grammar::tok_redirection> prim = redirection.child<0>(); // like 2> tnode_t<grammar::tok_redirection> prim = redirection.child<0>(); // like 2>
assert(prim && "expected to have primitive"); assert(prim && "expected to have primitive");
maybe_t<pipe_or_redir_t> result{};
if (prim.has_source()) { if (prim.has_source()) {
result = redirection_type_for_string(prim.get_source(src), out_fd); result = pipe_or_redir_t::from_string(prim.get_source(src));
assert(result.has_value() && "Failed to parse valid redirection");
assert(!result->is_pipe && "Should not be a pipe");
} }
if (out_target != NULL) { if (out_target != NULL) {
tnode_t<grammar::tok_string> target = redirection.child<1>(); // like &1 or file path tnode_t<grammar::tok_string> target = redirection.child<1>(); // like 1 or file path
*out_target = target.has_source() ? target.get_source(src) : wcstring(); *out_target = target.has_source() ? target.get_source(src) : wcstring();
} }
return result; return result;

View file

@ -242,10 +242,10 @@ enum parse_bool_statement_type_t bool_statement_type(tnode_t<grammar::job_decora
enum parse_bool_statement_type_t bool_statement_type( enum parse_bool_statement_type_t bool_statement_type(
tnode_t<grammar::job_conjunction_continuation> stmt); tnode_t<grammar::job_conjunction_continuation> stmt);
/// Given a redirection, get the redirection type (or none) and target (file path, or fd). /// Given a redirection node, get the parsed redirection and target of the redirection (file path,
maybe_t<redirection_type_t> redirection_type(tnode_t<grammar::redirection> redirection, /// or fd).
const wcstring &src, int *out_fd, maybe_t<pipe_or_redir_t> redirection_for_node(tnode_t<grammar::redirection> redirection,
wcstring *out_target); const wcstring &src, wcstring *out_target);
/// Return the arguments under an arguments_list or arguments_or_redirection_list /// Return the arguments under an arguments_list or arguments_or_redirection_list
/// Do not return more than max. /// Do not return more than max.

View file

@ -273,145 +273,172 @@ tok_t tokenizer_t::read_string() {
return result; return result;
} }
// Reads a redirection or an "fd pipe" (like 2>|) from a string. // Parse an fd from the non-empty string [start, end), all of which are digits.
// Returns the parsed pipe or redirection, or none() on error. // Return the fd, or -1 on overflow.
struct parsed_redir_or_pipe_t { static int parse_fd(const wchar_t *start, const wchar_t *end) {
// Number of characters consumed. assert(start < end && "String cannot be empty");
size_t consumed{0};
// The token type, always either TOK_PIPE or TOK_REDIRECT.
token_type_t type{token_type_t::redirect};
// The redirection mode if the type is TOK_REDIRECT.
redirection_type_t redirection_mode{redirection_type_t::overwrite};
// The redirected fd, or -1 on overflow.
int fd{0};
};
static maybe_t<parsed_redir_or_pipe_t> read_redirection_or_fd_pipe(const wchar_t *buff) {
bool errored = false;
parsed_redir_or_pipe_t result;
size_t idx = 0;
// Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like
// '>' or '^'. Try parsing out a number; if we did not get any digits then infer it from the
// first character. Watch out for overflow.
long long big_fd = 0; long long big_fd = 0;
for (; iswdigit(buff[idx]); idx++) { for (const wchar_t *cursor = start; cursor < end; ++cursor) {
// Note that it's important we consume all the digits here, even if it overflows. assert(L'0' <= *cursor && *cursor <= L'9' && "Not a digit");
if (big_fd <= INT_MAX) big_fd = big_fd * 10 + (buff[idx] - L'0'); big_fd = big_fd * 10 + (*cursor - L'0');
if (big_fd > INT_MAX) return -1;
}
assert(big_fd <= INT_MAX && "big_fd should be in range");
return static_cast<int>(big_fd);
} }
result.fd = (big_fd > INT_MAX ? -1 : static_cast<int>(big_fd)); pipe_or_redir_t::pipe_or_redir_t() = default;
if (idx == 0) { maybe_t<pipe_or_redir_t> pipe_or_redir_t::from_string(const wchar_t *buff) {
// We did not find a leading digit, so there's no explicit fd. Infer it from the type. pipe_or_redir_t result{};
switch (buff[idx]) {
case L'>': { /* Examples of supported syntaxes.
Note we are only responsible for parsing the redirection part, not 'cmd' or 'file'.
cmd | cmd normal pipe
cmd >| cmd pipe with explicit fd
cmd 2>| cmd pipe with explicit fd
cmd < file stdin redirection
cmd > file redirection
cmd >> file appending redirection
cmd >? file noclobber redirection
cmd >>? file appending noclobber redirection
cmd 2> file file redirection with explicit fd
cmd >&2 file fd redirection with no explicit src fd (stdout is used)
cmd 1>&2 file fd redirection with an explicit src fd
cmd <&2 file fd redirection with no explicit src fd (stdin is used)
cmd 3<&0 file fd redirection with an explicit src fd
cmd ^ file caret (stderr) redirection, perhaps disabled via feature flags
cmd ^^ file caret (stderr) redirection, perhaps disabled via feature flags
*/
const wchar_t *cursor = buff;
// Extract a range of leading fd.
const wchar_t *fd_start = cursor;
while (iswdigit(*cursor)) cursor++;
const wchar_t *fd_end = cursor;
bool has_fd = (fd_end > fd_start);
// Try consuming a given character.
// Return true if consumed. On success, advances cursor.
auto try_consume = [&cursor](wchar_t c) -> bool {
if (*cursor != c) return false;
cursor++;
return true;
};
// Like try_consume, but asserts on failure.
auto consume = [&](wchar_t c) {
assert(*cursor == c && "Failed to consume char");
cursor++;
};
switch (*cursor) {
case L'|': {
if (has_fd) {
// Like 123|
return none();
}
consume(L'|');
assert(*cursor != L'|' &&
"|| passed as redirection, this should have been handled as 'or' by the caller");
result.fd = STDOUT_FILENO; result.fd = STDOUT_FILENO;
result.is_pipe = true;
break;
}
case L'>': {
consume(L'>');
if (try_consume(L'|')) {
// Note we differ from bash here.
// Consider `echo foo 2>| bar`
// In fish, this is a *pipe*. Run bar as a command and attach foo's stderr to bar's
// stdin, while leaving stdout as tty.
// In bash, this is a *redirection* to bar as a file. It is like > but ignores
// noclobber.
result.is_pipe = true;
result.fd = has_fd ? parse_fd(fd_start, fd_end) // like 2>|
: STDOUT_FILENO; // like >|
} else if (try_consume(L'&')) {
// This is a redirection to an fd.
result.mode = redirection_mode_t::fd;
result.fd = has_fd ? parse_fd(fd_start, fd_end) // like 1>&2
: STDOUT_FILENO; // like >&2
} else {
// This is a redirection to a file.
result.fd = has_fd ? parse_fd(fd_start, fd_end) // like 1> file.txt
: STDOUT_FILENO; // like > file.txt
// Note 'echo abc >>? file' is valid: it means append and noclobber.
// But here "noclobber" means the file must not exist, so appending
// can be ignored.
result.mode = redirection_mode_t::overwrite;
if (try_consume(L'>')) result.mode = redirection_mode_t::append;
if (try_consume(L'?')) result.mode = redirection_mode_t::noclob;
}
break; break;
} }
case L'<': { case L'<': {
result.fd = STDIN_FILENO; consume(L'<');
if (try_consume('&')) {
result.mode = redirection_mode_t::fd;
} else {
result.mode = redirection_mode_t::input;
}
result.fd = has_fd ? parse_fd(fd_start, fd_end) // like 1<&3 or 1< /tmp/file.txt
: STDIN_FILENO; // like <&3 or < /tmp/file.txt
break; break;
} }
case L'^': { case L'^': {
if (caret_redirs()) { if (!caret_redirs()) {
result.fd = STDERR_FILENO; // ^ is not special if caret_redirs is disabled.
return none();
} else { } else {
errored = true; if (has_fd) {
}
break;
}
default: {
errored = true;
break;
}
}
}
// Either way we should have ended on the redirection character itself like '>'.
// Don't allow an fd with a caret redirection - see #1873
wchar_t redirect_char = buff[idx++]; // note increment of idx
if (redirect_char == L'>' || (redirect_char == L'^' && idx == 1 && caret_redirs())) {
result.redirection_mode = redirection_type_t::overwrite;
if (buff[idx] == redirect_char) {
// Doubled up like ^^ or >>. That means append.
result.redirection_mode = redirection_type_t::append;
idx++;
}
} else if (redirect_char == L'<') {
result.redirection_mode = redirection_type_t::input;
} else {
// Something else.
errored = true;
}
// Bail on error.
if (errored) {
return none(); return none();
} }
consume(L'^');
// Optional characters like & or ?, or the pipe char |. result.fd = STDERR_FILENO;
wchar_t opt_char = buff[idx]; result.mode = redirection_mode_t::overwrite;
if (opt_char == L'&') { if (try_consume(L'^')) result.mode = redirection_mode_t::append;
result.redirection_mode = redirection_type_t::fd; if (try_consume(L'?')) result.mode = redirection_mode_t::noclob;
idx++; break;
} else if (opt_char == L'?') { }
result.redirection_mode = redirection_type_t::noclob;
idx++;
} else if (opt_char == L'|') {
// So the string looked like '2>|'. This is not a redirection - it's a pipe! That gets
// handled elsewhere.
result.type = token_type_t::pipe;
idx++;
} }
result.consumed = idx; default: {
// Not a redirection.
return none();
}
}
result.consumed = (cursor - buff);
assert(result.consumed > 0 && "Should have consumed at least one character on success");
return result; return result;
} }
maybe_t<redirection_type_t> redirection_type_for_string(const wcstring &str, int *out_fd) { int pipe_or_redir_t::oflags() const {
auto v = read_redirection_or_fd_pipe(str.c_str()); switch (mode) {
// Redirections only, no pipes. case redirection_mode_t::append: {
if (!v || v->type != token_type_t::redirect || v->fd < 0) return none();
if (out_fd) *out_fd = v->fd;
return v->redirection_mode;
}
int fd_redirected_by_pipe(const wcstring &str) {
// Hack for the common case.
if (str == L"|") {
return STDOUT_FILENO;
}
auto v = read_redirection_or_fd_pipe(str.c_str());
return (v && v->type == token_type_t::pipe) ? v->fd : -1;
}
int oflags_for_redirection_type(redirection_type_t type) {
switch (type) {
case redirection_type_t::append: {
return O_CREAT | O_APPEND | O_WRONLY; return O_CREAT | O_APPEND | O_WRONLY;
} }
case redirection_type_t::overwrite: { case redirection_mode_t::overwrite: {
return O_CREAT | O_WRONLY | O_TRUNC; return O_CREAT | O_WRONLY | O_TRUNC;
} }
case redirection_type_t::noclob: { case redirection_mode_t::noclob: {
return O_CREAT | O_EXCL | O_WRONLY; return O_CREAT | O_EXCL | O_WRONLY;
} }
case redirection_type_t::input: { case redirection_mode_t::input: {
return O_RDONLY; return O_RDONLY;
} }
case redirection_type_t::fd: case redirection_mode_t::fd:
default: { default: {
return -1; return -1;
} }
} }
} }
/// Test if a character is whitespace. Differs from iswspace in that it does not consider a newline /// Test if a character is whitespace. Differs from iswspace in that it does not consider a
/// to be whitespace. /// newline to be whitespace.
static bool iswspace_not_nl(wchar_t c) { static bool iswspace_not_nl(wchar_t c) {
switch (c) { switch (c) {
case L' ': case L' ':
@ -430,7 +457,8 @@ maybe_t<tok_t> tokenizer_t::next() {
return none(); return none();
} }
// Consume non-newline whitespace. If we get an escaped newline, mark it and continue past it. // Consume non-newline whitespace. If we get an escaped newline, mark it and continue past
// it.
bool preceding_escaped_nl = false; bool preceding_escaped_nl = false;
for (;;) { for (;;) {
if (this->buff[0] == L'\\' && this->buff[1] == L'\n') { if (this->buff[0] == L'\\' && this->buff[1] == L'\n') {
@ -512,23 +540,27 @@ maybe_t<tok_t> tokenizer_t::next() {
result->length = 2; result->length = 2;
this->buff += 2; this->buff += 2;
} else { } else {
result.emplace(token_type_t::pipe); auto pipe = pipe_or_redir_t::from_string(buff);
assert(pipe.has_value() && pipe->is_pipe &&
"Should always succeed to parse a | pipe");
result.emplace(pipe->token_type());
result->offset = start_pos; result->offset = start_pos;
result->length = 1; result->length = pipe->consumed;
this->buff++; this->buff += pipe->consumed;
} }
break; break;
} }
case L'>': case L'>':
case L'<': { case L'<': {
// There's some duplication with the code in the default case below. The key difference // There's some duplication with the code in the default case below. The key
// here is that we must never parse these as a string; a failed redirection is an error! // difference here is that we must never parse these as a string; a failed
auto redir_or_pipe = read_redirection_or_fd_pipe(this->buff); // redirection is an error!
auto redir_or_pipe = pipe_or_redir_t::from_string(this->buff);
if (!redir_or_pipe || redir_or_pipe->fd < 0) { if (!redir_or_pipe || redir_or_pipe->fd < 0) {
return this->call_error(tokenizer_error_t::invalid_redirect, this->buff, return this->call_error(tokenizer_error_t::invalid_redirect, this->buff,
this->buff); this->buff);
} }
result.emplace(redir_or_pipe->type); result.emplace(redir_or_pipe->token_type());
result->offset = start_pos; result->offset = start_pos;
result->length = redir_or_pipe->consumed; result->length = redir_or_pipe->consumed;
this->buff += redir_or_pipe->consumed; this->buff += redir_or_pipe->consumed;
@ -537,20 +569,20 @@ maybe_t<tok_t> tokenizer_t::next() {
default: { default: {
// Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string. // Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string.
const wchar_t *error_location = this->buff; const wchar_t *error_location = this->buff;
maybe_t<parsed_redir_or_pipe_t> redir_or_pipe; maybe_t<pipe_or_redir_t> redir_or_pipe{};
if (iswdigit(*this->buff) || (*this->buff == L'^' && caret_redirs())) { if (iswdigit(*this->buff) || (*this->buff == L'^' && caret_redirs())) {
redir_or_pipe = read_redirection_or_fd_pipe(this->buff); redir_or_pipe = pipe_or_redir_t::from_string(this->buff);
} }
if (redir_or_pipe && redir_or_pipe->consumed > 0) { if (redir_or_pipe) {
// It looks like a redirection or a pipe. But we don't support piping fd 0. Note // It looks like a redirection or a pipe. But we don't support piping fd 0. Note
// that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer // that fd 0 may be -1, indicating overflow; but we don't treat that as a
// error. // tokenizer error.
if (redir_or_pipe->type == token_type_t::pipe && redir_or_pipe->fd == 0) { if (redir_or_pipe->is_pipe && redir_or_pipe->fd == 0) {
return this->call_error(tokenizer_error_t::invalid_pipe, error_location, return this->call_error(tokenizer_error_t::invalid_pipe, error_location,
error_location); error_location);
} }
result.emplace(redir_or_pipe->type); result.emplace(redir_or_pipe->token_type());
result->offset = start_pos; result->offset = start_pos;
result->length = redir_or_pipe->consumed; result->length = redir_or_pipe->consumed;
this->buff += redir_or_pipe->consumed; this->buff += redir_or_pipe->consumed;
@ -637,7 +669,8 @@ bool move_word_state_machine_t::consume_char_punctuation(wchar_t c) {
bool move_word_state_machine_t::is_path_component_character(wchar_t c) { bool move_word_state_machine_t::is_path_component_character(wchar_t c) {
// Always treat separators as first. All this does is ensure that we treat ^ as a string // Always treat separators as first. All this does is ensure that we treat ^ as a string
// character instead of as stderr redirection, which I hypothesize is usually what is desired. // character instead of as stderr redirection, which I hypothesize is usually what is
// desired.
return tok_is_string_character(c, true) && !std::wcschr(L"/={,}'\":@", c); return tok_is_string_character(c, true) && !std::wcschr(L"/={,}'\":@", c);
} }

View file

@ -22,7 +22,7 @@ enum class token_type_t {
comment, /// comment token comment, /// comment token
}; };
enum class redirection_type_t { enum class redirection_mode_t {
overwrite, // normal redirection: > file.txt overwrite, // normal redirection: > file.txt
append, // appending redirection: >> file.txt append, // appending redirection: >> file.txt
input, // input redirection: < file.txt input, // input redirection: < file.txt
@ -138,14 +138,44 @@ class tokenizer_t {
/// returns the empty string. /// returns the empty string.
wcstring tok_first(const wcstring &str); wcstring tok_first(const wcstring &str);
/// Helper function to determine redirection type from a string. Also returns the fd by reference. /// Struct wrapping up a parsed pipe or redirection.
maybe_t<redirection_type_t> redirection_type_for_string(const wcstring &str, int *out_fd = NULL); struct pipe_or_redir_t {
// The redirected fd, or -1 on overflow.
// In the common case of a pipe, this is 0 (STDOUT_FILENO).
// For example, in the case of "3>&1" this will be 3.
int fd{0};
/// Helper function to determine which fd is redirected by a pipe. // Whether we are a pipe (true) or redirection (false).
int fd_redirected_by_pipe(const wcstring &str); bool is_pipe{false};
/// Helper function to return oflags (as in open(2)) for a redirection type. // The redirection mode if the type is redirect.
int oflags_for_redirection_type(redirection_type_t type); // Ignored for pipes.
redirection_mode_t mode{redirection_mode_t::overwrite};
// Number of characters consumed when parsing the string.
size_t consumed{0};
// Construct from a string.
static maybe_t<pipe_or_redir_t> from_string(const wchar_t *buff);
static maybe_t<pipe_or_redir_t> from_string(const wcstring &buff) {
return from_string(buff.c_str());
}
// \return the oflags (as in open(2)) for this redirection.
int oflags() const;
// \return if we are "valid". Here "valid" means only that the source fd did not overflow.
// For example 99999999999> is invalid.
bool is_valid() const { return fd >= 0; }
// \return the token type for this redirection.
token_type_t token_type() const {
return is_pipe ? token_type_t::pipe : token_type_t::redirect;
}
private:
pipe_or_redir_t();
};
enum move_word_style_t { enum move_word_style_t {
move_word_style_punctuation, // stop at punctuation move_word_style_punctuation, // stop at punctuation