Refactor how redirections are represented by the tokenizer

Prior to this fix, each redirection type was a separate token_type.
Unify these under a single type TOK_REDIRECT and break the redirection
type out into a new sub-type redirection_type_t.
This commit is contained in:
ridiculousfish 2018-02-23 15:19:58 -08:00
parent 6673fe5457
commit 99fb7bb6aa
8 changed files with 131 additions and 147 deletions

View file

@ -536,10 +536,9 @@ static void test_tokenizer() {
L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells "
L"){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect "
L"Compress_Newlines\n \n\t\n \nInto_Just_One";
const int types[] = {TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD,
TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT,
TOK_REDIRECT_APPEND, TOK_STRING, TOK_STRING, TOK_END,
TOK_STRING};
const int types[] = {TOK_STRING, TOK_REDIRECT, TOK_STRING, TOK_REDIRECT, TOK_STRING,
TOK_STRING, TOK_STRING, TOK_REDIRECT, TOK_REDIRECT, TOK_STRING,
TOK_STRING, TOK_END, TOK_STRING};
say(L"Test correct tokenization");
@ -594,25 +593,25 @@ static void test_tokenizer() {
}
// Test redirection_type_for_string.
if (redirection_type_for_string(L"<") != TOK_REDIRECT_IN)
if (redirection_type_for_string(L"<") != redirection_type_t::input)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"^") != TOK_REDIRECT_OUT)
if (redirection_type_for_string(L"^") != redirection_type_t::overwrite)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L">") != TOK_REDIRECT_OUT)
if (redirection_type_for_string(L">") != redirection_type_t::overwrite)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"2>") != TOK_REDIRECT_OUT)
if (redirection_type_for_string(L"2>") != redirection_type_t::overwrite)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L">>") != TOK_REDIRECT_APPEND)
if (redirection_type_for_string(L">>") != redirection_type_t::append)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"2>>") != TOK_REDIRECT_APPEND)
if (redirection_type_for_string(L"2>>") != redirection_type_t::append)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"2>?") != TOK_REDIRECT_NOCLOB)
if (redirection_type_for_string(L"2>?") != redirection_type_t::noclob)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"9999999999999999>?") != TOK_NONE)
if (redirection_type_for_string(L"9999999999999999>?"))
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"2>&3") != TOK_REDIRECT_FD)
if (redirection_type_for_string(L"2>&3") != redirection_type_t::fd)
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
if (redirection_type_for_string(L"2>|") != TOK_NONE)
if (redirection_type_for_string(L"2>|"))
err(L"redirection_type_for_string failed on line %ld", (long)__LINE__);
}

View file

@ -836,11 +836,11 @@ void highlighter_t::color_redirection(tnode_t<g::redirection> redirection_node)
if (redir_prim) {
wcstring target;
const enum token_type redirect_type =
const maybe_t<redirection_type_t> redirect_type =
redirection_type(redirection_node, this->buff, nullptr, &target);
// We may get a TOK_NONE redirection type, e.g. if the redirection is invalid.
auto hl = redirect_type == TOK_NONE ? highlight_spec_error : highlight_spec_redirection;
// We may get a missing redirection type if the redirection is invalid.
auto hl = redirect_type ? highlight_spec_redirection : highlight_spec_error;
this->color_node(redir_prim, hl);
// Check if the argument contains a command substitution. If so, highlight it as a param
@ -852,7 +852,10 @@ void highlighter_t::color_redirection(tnode_t<g::redirection> redirection_node)
// disallow redirections into a non-existent directory.
bool target_is_valid = true;
if (!this->io_ok) {
if (!redirect_type) {
// not a valid redirection
target_is_valid = false;
} else if (!this->io_ok) {
// I/O is disallowed, so we don't have much hope of catching anything but gross
// errors. Assume it's valid.
target_is_valid = true;
@ -865,22 +868,22 @@ void highlighter_t::color_redirection(tnode_t<g::redirection> redirection_node)
// redirections). Note that the target is now unescaped.
const wcstring target_path =
path_apply_working_directory(target, this->working_directory);
switch (redirect_type) {
case TOK_REDIRECT_FD: {
switch (*redirect_type) {
case redirection_type_t::fd: {
int fd = fish_wcstoi(target.c_str());
target_is_valid = !errno && fd >= 0;
break;
}
case TOK_REDIRECT_IN: {
case redirection_type_t::input: {
// Input redirections must have a readable non-directory.
struct stat buf = {};
target_is_valid = !waccess(target_path, R_OK) &&
!wstat(target_path, &buf) && !S_ISDIR(buf.st_mode);
break;
}
case TOK_REDIRECT_OUT:
case TOK_REDIRECT_APPEND:
case TOK_REDIRECT_NOCLOB: {
case redirection_type_t::overwrite:
case redirection_type_t::append:
case redirection_type_t::noclob: {
// Test whether the file exists, and whether it's writable (possibly after
// creating it). access() returns failure if the file does not exist.
bool file_exists = false, file_is_writable = false;
@ -922,14 +925,9 @@ void highlighter_t::color_redirection(tnode_t<g::redirection> redirection_node)
}
// NOCLOB means that we must not overwrite files that exist.
target_is_valid = file_is_writable &&
!(file_exists && redirect_type == TOK_REDIRECT_NOCLOB);
break;
}
default: {
// We should not get here, since the node was marked as a redirection, but
// treat it as an error for paranoia.
target_is_valid = false;
target_is_valid =
file_is_writable &&
!(file_exists && redirect_type == redirection_type_t::noclob);
break;
}
}

View file

@ -871,8 +871,7 @@ bool parse_execution_context_t::determine_io_chain(tnode_t<g::arguments_or_redir
while (auto redirect_node = node.next_in_list<g::redirection>()) {
int source_fd = -1; // source fd
wcstring target; // file path or target fd
enum token_type redirect_type =
redirection_type(redirect_node, pstree->src, &source_fd, &target);
auto redirect_type = redirection_type(redirect_node, pstree->src, &source_fd, &target);
// PCA: I can't justify this EXPAND_SKIP_VARIABLES flag. It was like this when I got here.
bool target_expanded = expand_one(target, no_exec ? EXPAND_SKIP_VARIABLES : 0, NULL);
@ -884,9 +883,9 @@ bool parse_execution_context_t::determine_io_chain(tnode_t<g::arguments_or_redir
// Generate the actual IO redirection.
shared_ptr<io_data_t> new_io;
assert(redirect_type != TOK_NONE);
switch (redirect_type) {
case TOK_REDIRECT_FD: {
assert(redirect_type && "expected to have a valid redirection");
switch (*redirect_type) {
case redirection_type_t::fd: {
if (target == L"-") {
new_io.reset(new io_close_t(source_fd));
} else {
@ -902,21 +901,12 @@ bool parse_execution_context_t::determine_io_chain(tnode_t<g::arguments_or_redir
}
break;
}
case TOK_REDIRECT_OUT:
case TOK_REDIRECT_APPEND:
case TOK_REDIRECT_IN:
case TOK_REDIRECT_NOCLOB: {
int oflags = oflags_for_redirection_type(redirect_type);
default: {
int oflags = oflags_for_redirection_type(*redirect_type);
io_file_t *new_io_file = new io_file_t(source_fd, target, oflags);
new_io.reset(new_io_file);
break;
}
default: {
// Should be unreachable.
debug(0, "Unexpected redirection type %ld.", (long)redirect_type);
PARSER_DIE();
break;
}
}
// Append the new_io if we got one.

View file

@ -231,11 +231,7 @@ static inline parse_token_type_t parse_token_type_from_tokenizer_token(
result = parse_token_type_background;
break;
}
case TOK_REDIRECT_OUT:
case TOK_REDIRECT_APPEND:
case TOK_REDIRECT_IN:
case TOK_REDIRECT_FD:
case TOK_REDIRECT_NOCLOB: {
case TOK_REDIRECT: {
result = parse_token_type_redirection;
break;
}

View file

@ -50,10 +50,11 @@ enum parse_bool_statement_type_t bool_statement_type(tnode_t<grammar::boolean_st
return static_cast<parse_bool_statement_type_t>(stmt.tag());
}
enum token_type redirection_type(tnode_t<grammar::redirection> redirection, const wcstring &src,
int *out_fd, wcstring *out_target) {
maybe_t<redirection_type_t> redirection_type(tnode_t<grammar::redirection> redirection,
const wcstring &src, int *out_fd,
wcstring *out_target) {
assert(redirection && "redirection is missing");
enum token_type result = TOK_NONE;
maybe_t<redirection_type_t> result{};
tnode_t<grammar::tok_redirection> prim = redirection.child<0>(); // like 2>
assert(prim && "expected to have primitive");

View file

@ -234,9 +234,10 @@ parse_statement_decoration_t get_decoration(tnode_t<grammar::plain_statement> st
/// Return the type for a boolean statement.
enum parse_bool_statement_type_t bool_statement_type(tnode_t<grammar::boolean_statement> stmt);
/// Given a redirection, get the redirection type (or TOK_NONE) and target (file path, or fd).
enum token_type redirection_type(tnode_t<grammar::redirection> redirection, const wcstring &src,
int *out_fd, wcstring *out_target);
/// Given a redirection, get the redirection type (or none) and target (file path, or fd).
maybe_t<redirection_type_t> redirection_type(tnode_t<grammar::redirection> redirection,
const wcstring &src, int *out_fd,
wcstring *out_target);
/// Return the arguments under an arguments_list or arguments_or_redirection_list
/// Do not return more than max.

View file

@ -323,15 +323,25 @@ tok_t tokenizer_t::read_string() {
return result;
}
/// Reads a redirection or an "fd pipe" (like 2>|) from a string. Returns how many characters were
/// consumed. If zero, then this string was not a redirection. Also returns by reference the
/// redirection mode, and the fd to redirection. If there is overflow, *out_fd is set to -1.
static size_t read_redirection_or_fd_pipe(const wchar_t *buff,
enum token_type *out_redirection_mode, int *out_fd) {
bool errored = false;
int fd = 0;
enum token_type redirection_mode = TOK_NONE;
// Reads a redirection or an "fd pipe" (like 2>|) from a string.
// Returns the parsed pipe or redirection, or none() on error.
struct parsed_redir_or_pipe_t {
// Number of characters consumed.
size_t consumed{0};
// The token type, always either TOK_PIPE or TOK_REDIRECT.
token_type type{TOK_REDIRECT};
// The redirection mode if the type is TOK_REDIRECT.
redirection_type_t redirection_mode{redirection_type_t::overwrite};
// The redirected fd, or -1 on overflow.
int fd{0};
};
static maybe_t<parsed_redir_or_pipe_t> read_redirection_or_fd_pipe(const wchar_t *buff) {
bool errored = false;
parsed_redir_or_pipe_t result;
size_t idx = 0;
// Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like
@ -343,21 +353,21 @@ static size_t read_redirection_or_fd_pipe(const wchar_t *buff,
if (big_fd <= INT_MAX) big_fd = big_fd * 10 + (buff[idx] - L'0');
}
fd = (big_fd > INT_MAX ? -1 : static_cast<int>(big_fd));
result.fd = (big_fd > INT_MAX ? -1 : static_cast<int>(big_fd));
if (idx == 0) {
// We did not find a leading digit, so there's no explicit fd. Infer it from the type.
switch (buff[idx]) {
case L'>': {
fd = STDOUT_FILENO;
result.fd = STDOUT_FILENO;
break;
}
case L'<': {
fd = STDIN_FILENO;
result.fd = STDIN_FILENO;
break;
}
case L'^': {
fd = STDERR_FILENO;
result.fd = STDERR_FILENO;
break;
}
default: {
@ -371,55 +381,49 @@ static size_t read_redirection_or_fd_pipe(const wchar_t *buff,
// Don't allow an fd with a caret redirection - see #1873
wchar_t redirect_char = buff[idx++]; // note increment of idx
if (redirect_char == L'>' || (redirect_char == L'^' && idx == 1)) {
redirection_mode = TOK_REDIRECT_OUT;
result.redirection_mode = redirection_type_t::overwrite;
if (buff[idx] == redirect_char) {
// Doubled up like ^^ or >>. That means append.
redirection_mode = TOK_REDIRECT_APPEND;
result.redirection_mode = redirection_type_t::append;
idx++;
}
} else if (redirect_char == L'<') {
redirection_mode = TOK_REDIRECT_IN;
result.redirection_mode = redirection_type_t::input;
} else {
// Something else.
errored = true;
}
// Don't return valid-looking stuff on error.
// Bail on error.
if (errored) {
idx = 0;
redirection_mode = TOK_NONE;
} else {
// Optional characters like & or ?, or the pipe char |.
wchar_t opt_char = buff[idx];
if (opt_char == L'&') {
redirection_mode = TOK_REDIRECT_FD;
idx++;
} else if (opt_char == L'?') {
redirection_mode = TOK_REDIRECT_NOCLOB;
idx++;
} else if (opt_char == L'|') {
// So the string looked like '2>|'. This is not a redirection - it's a pipe! That gets
// handled elsewhere.
redirection_mode = TOK_PIPE;
idx++;
}
return none();
}
// Return stuff.
if (out_redirection_mode != NULL) *out_redirection_mode = redirection_mode;
if (out_fd != NULL) *out_fd = fd;
// Optional characters like & or ?, or the pipe char |.
wchar_t opt_char = buff[idx];
if (opt_char == L'&') {
result.redirection_mode = redirection_type_t::fd;
idx++;
} else if (opt_char == L'?') {
result.redirection_mode = redirection_type_t::noclob;
idx++;
} else if (opt_char == L'|') {
// So the string looked like '2>|'. This is not a redirection - it's a pipe! That gets
// handled elsewhere.
result.type = TOK_PIPE;
idx++;
}
return idx;
result.consumed = idx;
return result;
}
enum token_type redirection_type_for_string(const wcstring &str, int *out_fd) {
enum token_type mode = TOK_NONE;
int fd = 0;
read_redirection_or_fd_pipe(str.c_str(), &mode, &fd);
maybe_t<redirection_type_t> redirection_type_for_string(const wcstring &str, int *out_fd) {
auto v = read_redirection_or_fd_pipe(str.c_str());
// Redirections only, no pipes.
if (mode == TOK_PIPE || fd < 0) mode = TOK_NONE;
if (out_fd != NULL) *out_fd = fd;
return mode;
if (!v || v->type != TOK_REDIRECT || v->fd < 0) return none();
if (out_fd) *out_fd = v->fd;
return v->redirection_mode;
}
int fd_redirected_by_pipe(const wcstring &str) {
@ -427,27 +431,22 @@ int fd_redirected_by_pipe(const wcstring &str) {
if (str == L"|") {
return STDOUT_FILENO;
}
enum token_type mode = TOK_NONE;
int fd = 0;
read_redirection_or_fd_pipe(str.c_str(), &mode, &fd);
// Pipes only.
if (mode != TOK_PIPE || fd < 0) fd = -1;
return fd;
auto v = read_redirection_or_fd_pipe(str.c_str());
return (v && v->type == TOK_PIPE) ? v->fd : -1;
}
int oflags_for_redirection_type(enum token_type type) {
int oflags_for_redirection_type(redirection_type_t type) {
switch (type) {
case TOK_REDIRECT_APPEND: {
case redirection_type_t::append: {
return O_CREAT | O_APPEND | O_WRONLY;
}
case TOK_REDIRECT_OUT: {
case redirection_type_t::overwrite: {
return O_CREAT | O_WRONLY | O_TRUNC;
}
case TOK_REDIRECT_NOCLOB: {
case redirection_type_t::noclob: {
return O_CREAT | O_EXCL | O_WRONLY;
}
case TOK_REDIRECT_IN: {
case redirection_type_t::input: {
return O_RDONLY;
}
default: { return -1; }
@ -551,39 +550,35 @@ maybe_t<tok_t> tokenizer_t::tok_next() {
case L'^': {
// There's some duplication with the code in the default case below. The key difference
// here is that we must never parse these as a string; a failed redirection is an error!
enum token_type mode = TOK_NONE;
int fd = -1;
size_t consumed = read_redirection_or_fd_pipe(this->buff, &mode, &fd);
if (consumed == 0 || fd < 0) {
auto redir_or_pipe = read_redirection_or_fd_pipe(this->buff);
if (!redir_or_pipe || redir_or_pipe->fd < 0) {
return this->call_error(TOK_INVALID_REDIRECT, this->buff, this->buff);
}
result.type = mode;
result.redirected_fd = fd;
result.length = consumed;
this->buff += consumed;
result.type = redir_or_pipe->type;
result.redirected_fd = redir_or_pipe->fd;
result.length = redir_or_pipe->consumed;
this->buff += redir_or_pipe->consumed;
break;
}
default: {
// Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string.
const wchar_t *error_location = this->buff;
size_t consumed = 0;
enum token_type mode = TOK_NONE;
int fd = -1;
maybe_t<parsed_redir_or_pipe_t> redir_or_pipe;
if (iswdigit(*this->buff)) {
consumed = read_redirection_or_fd_pipe(this->buff, &mode, &fd);
redir_or_pipe = read_redirection_or_fd_pipe(this->buff);
}
if (consumed > 0) {
if (redir_or_pipe && redir_or_pipe->consumed > 0) {
// It looks like a redirection or a pipe. But we don't support piping fd 0. Note
// that fd 0 may be -1, indicating overflow; but we don't treat that as a tokenizer
// error.
if (mode == TOK_PIPE && fd == 0) {
if (redir_or_pipe->type == TOK_PIPE && redir_or_pipe->fd == 0) {
return this->call_error(TOK_INVALID_PIPE, error_location, error_location);
}
result.type = mode;
result.redirected_fd = fd;
result.length = consumed;
this->buff += consumed;
result.type = redir_or_pipe->type;
result.redirected_fd = redir_or_pipe->fd;
result.length = redir_or_pipe->consumed;
this->buff += redir_or_pipe->consumed;
} else {
// Not a redirection or pipe, so just a string.
result = this->read_string();

View file

@ -10,18 +10,14 @@
/// Token types.
enum token_type {
TOK_NONE, /// Tokenizer not yet constructed
TOK_ERROR, /// Error reading token
TOK_STRING, /// String token
TOK_PIPE, /// Pipe token
TOK_END, /// End token (semicolon or newline, not literal end)
TOK_REDIRECT_OUT, /// redirection token
TOK_REDIRECT_APPEND, /// redirection append token
TOK_REDIRECT_IN, /// input redirection token
TOK_REDIRECT_FD, /// redirection to new fd token
TOK_REDIRECT_NOCLOB, /// redirection token
TOK_BACKGROUND, /// send job to bg token
TOK_COMMENT /// comment token
TOK_NONE, /// Tokenizer not yet constructed
TOK_ERROR, /// Error reading token
TOK_STRING, /// String token
TOK_PIPE, /// Pipe token
TOK_END, /// End token (semicolon or newline, not literal end)
TOK_REDIRECT, /// redirection token
TOK_BACKGROUND, /// send job to bg token
TOK_COMMENT /// comment token
};
/// Tokenizer error types.
@ -35,6 +31,14 @@ enum tokenizer_error {
TOK_INVALID_PIPE
};
enum class redirection_type_t {
overwrite, // normal redirection: > file.txt
append, // appending redirection: >> file.txt
input, // input redirection: < file.txt
fd, // fd redirection: 2>&1
noclob // noclobber redirection: >? file.txt
};
/// Flag telling the tokenizer to accept incomplete parameters, i.e. parameters with mismatching
/// paranthesis, etc. This is useful for tab-completion.
#define TOK_ACCEPT_UNFINISHED 1
@ -127,13 +131,13 @@ wcstring tok_first(const wcstring &str);
/// Helper function to determine redirection type from a string, or TOK_NONE if the redirection is
/// invalid. Also returns the fd by reference.
enum token_type redirection_type_for_string(const wcstring &str, int *out_fd = NULL);
maybe_t<redirection_type_t> redirection_type_for_string(const wcstring &str, int *out_fd = NULL);
/// Helper function to determine which fd is redirected by a pipe.
int fd_redirected_by_pipe(const wcstring &str);
/// Helper function to return oflags (as in open(2)) for a redirection type.
int oflags_for_redirection_type(enum token_type type);
int oflags_for_redirection_type(redirection_type_t type);
enum move_word_style_t {
move_word_style_punctuation, // stop at punctuation