mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-28 04:35:09 +00:00
Make { and } valid, first-class tokenizer elements
This commit is contained in:
parent
7447432471
commit
00f95a978e
6 changed files with 89 additions and 89 deletions
|
@ -47,6 +47,7 @@
|
|||
#include "proc.h"
|
||||
#include "reader.h"
|
||||
#include "wildcard.h"
|
||||
#include "wcstringutil.h"
|
||||
#include "wutil.h" // IWYU pragma: keep
|
||||
#ifdef KERN_PROCARGS2
|
||||
#else
|
||||
|
@ -941,7 +942,8 @@ static expand_error_t expand_braces(const wcstring &instr, expand_flags_t flags,
|
|||
whole_item.append(in, length_preceding_braces);
|
||||
whole_item.append(item_begin, item_len);
|
||||
whole_item.append(brace_end + 1);
|
||||
debug(0, L"Found brace item: %ls\n", whole_item.c_str());
|
||||
auto whole_item2 = trim(whole_item);
|
||||
debug(0, L"Found brace item: %ls\n", whole_item2.c_str());
|
||||
expand_braces(whole_item, flags, out, errors);
|
||||
|
||||
item_begin = pos + 1;
|
||||
|
|
|
@ -668,35 +668,10 @@ void parse_ll_t::parse_error_failed_production(struct parse_stack_element_t &sta
|
|||
}
|
||||
|
||||
void parse_ll_t::report_tokenizer_error(const tokenizer_t &tokenizer, const tok_t &tok) {
|
||||
parse_error_code_t parse_error_code;
|
||||
switch (tok.error) {
|
||||
case TOK_UNTERMINATED_QUOTE: {
|
||||
parse_error_code = parse_error_tokenizer_unterminated_quote;
|
||||
break;
|
||||
}
|
||||
case TOK_UNTERMINATED_SUBSHELL: {
|
||||
parse_error_code = parse_error_tokenizer_unterminated_subshell;
|
||||
break;
|
||||
}
|
||||
case TOK_UNTERMINATED_SLICE: {
|
||||
parse_error_code = parse_error_tokenizer_unterminated_slice;
|
||||
break;
|
||||
}
|
||||
case TOK_UNTERMINATED_ESCAPE: {
|
||||
parse_error_code = parse_error_tokenizer_unterminated_escape;
|
||||
break;
|
||||
}
|
||||
case TOK_INVALID_REDIRECT:
|
||||
case TOK_INVALID_PIPE:
|
||||
default: {
|
||||
parse_error_code = parse_error_tokenizer_other;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
parse_error_code_t parse_error_code = tok.error->parser_error;
|
||||
this->parse_error_at_location(tok.offset, tok.length, tok.offset + tok.error_offset,
|
||||
parse_error_code, L"%ls",
|
||||
error_message_for_code(tok.error).c_str());
|
||||
tok.error->Message);
|
||||
}
|
||||
|
||||
void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token) {
|
||||
|
|
|
@ -16,56 +16,22 @@
|
|||
#include "tokenizer.h"
|
||||
#include "wutil.h" // IWYU pragma: keep
|
||||
|
||||
/// Error string for unexpected end of string.
|
||||
#define QUOTE_ERROR _(L"Unexpected end of string, quotes are not balanced")
|
||||
|
||||
/// Error string for mismatched parenthesis.
|
||||
#define PARAN_ERROR _(L"Unexpected end of string, parenthesis do not match")
|
||||
|
||||
/// Error string for mismatched square brackets.
|
||||
#define SQUARE_BRACKET_ERROR _(L"Unexpected end of string, square brackets do not match")
|
||||
|
||||
/// Error string for unterminated escape (backslash without continuation).
|
||||
#define UNTERMINATED_ESCAPE_ERROR _(L"Unexpected end of string, incomplete escape sequence")
|
||||
|
||||
/// Error string for invalid redirections.
|
||||
#define REDIRECT_ERROR _(L"Invalid input/output redirection")
|
||||
|
||||
/// Error string for when trying to pipe from fd 0.
|
||||
#define PIPE_ERROR _(L"Cannot use stdin (fd 0) as pipe output")
|
||||
|
||||
/// Error for when ) is encountered with no matching (
|
||||
#define ERROR_CLOSING_UNOPENED_PARENTHESIS _(L"Unexpected ')' for unopened parenthesis")
|
||||
|
||||
/// Error for when [ is encountered while already in bracket mode
|
||||
#define ERROR_UNEXPECTED_BRACKET _(L"Unexpected '[' at this location")
|
||||
|
||||
wcstring error_message_for_code(tokenizer_error err) {
|
||||
switch (err) {
|
||||
case TOK_UNTERMINATED_QUOTE:
|
||||
return QUOTE_ERROR;
|
||||
case TOK_UNTERMINATED_SUBSHELL:
|
||||
return PARAN_ERROR;
|
||||
case TOK_UNTERMINATED_SLICE:
|
||||
return SQUARE_BRACKET_ERROR;
|
||||
case TOK_UNTERMINATED_ESCAPE:
|
||||
return UNTERMINATED_ESCAPE_ERROR;
|
||||
case TOK_INVALID_REDIRECT:
|
||||
return REDIRECT_ERROR;
|
||||
case TOK_INVALID_PIPE:
|
||||
return PIPE_ERROR;
|
||||
case TOK_CLOSING_UNOPENED_SUBSHELL:
|
||||
return ERROR_CLOSING_UNOPENED_PARENTHESIS;
|
||||
case TOK_ILLEGAL_SLICE:
|
||||
return ERROR_UNEXPECTED_BRACKET;
|
||||
default:
|
||||
assert(0 && "Unknown error type");
|
||||
return {};
|
||||
}
|
||||
}
|
||||
tokenizer_error *TOK_ERROR_NONE = new tokenizer_error(L"");
|
||||
tokenizer_error *TOK_UNTERMINATED_QUOTE = new tokenizer_error((L"Unexpected end of string, quotes are not balanced"), parse_error_tokenizer_unterminated_quote);
|
||||
tokenizer_error *TOK_UNTERMINATED_SUBSHELL = new tokenizer_error((L"Unexpected end of string, expecting ')'"), parse_error_tokenizer_unterminated_subshell);
|
||||
tokenizer_error *TOK_UNTERMINATED_SLICE = new tokenizer_error((L"Unexpected end of string, square brackets do not match"), parse_error_tokenizer_unterminated_slice);
|
||||
tokenizer_error *TOK_UNTERMINATED_ESCAPE = new tokenizer_error((L"Unexpected end of string, incomplete escape sequence"), parse_error_tokenizer_unterminated_escape);
|
||||
tokenizer_error *TOK_INVALID_REDIRECT = new tokenizer_error((L"Invalid input/output redirection"));
|
||||
tokenizer_error *TOK_INVALID_PIPE = new tokenizer_error((L"Cannot use stdin (fd 0) as pipe output"));
|
||||
tokenizer_error *TOK_CLOSING_UNOPENED_SUBSHELL = new tokenizer_error((L"Unexpected ')' for unopened parenthesis"));
|
||||
tokenizer_error *TOK_ILLEGAL_SLICE = new tokenizer_error((L"Unexpected '[' at this location"));
|
||||
tokenizer_error *TOK_CLOSING_UNOPENED_BRACE = new tokenizer_error((L"Unexpected '}' for unopened brace expansion"));
|
||||
tokenizer_error *TOK_UNTERMINATED_BRACE = new tokenizer_error((L"Unexpected end of string, incomplete parameter expansion"));
|
||||
tokenizer_error *TOK_EXPECTED_PCLOSE_FOUND_BCLOSE = new tokenizer_error((L"Unexpected '}' found, expecting ')'"));
|
||||
tokenizer_error *TOK_EXPECTED_BCLOSE_FOUND_PCLOSE = new tokenizer_error((L"Unexpected ')' found, expecting '}'"));
|
||||
|
||||
/// Return an error token and mark that we no longer have a next token.
|
||||
tok_t tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *token_start,
|
||||
tok_t tokenizer_t::call_error(tokenizer_error *error_type, const wchar_t *token_start,
|
||||
const wchar_t *error_loc) {
|
||||
assert(error_type != TOK_ERROR_NONE && "TOK_ERROR_NONE passed to call_error");
|
||||
assert(error_loc >= token_start && "Invalid error location");
|
||||
|
@ -143,6 +109,7 @@ ENUM_FLAGS(tok_mode) {
|
|||
tok_t tokenizer_t::read_string() {
|
||||
tok_mode mode { tok_mode::regular_text };
|
||||
std::vector<int> paran_offsets;
|
||||
std::vector<char> expecting;
|
||||
int slice_offset = 0;
|
||||
const wchar_t *const buff_start = this->buff;
|
||||
bool is_first = true;
|
||||
|
@ -175,9 +142,18 @@ tok_t tokenizer_t::read_string() {
|
|||
}
|
||||
else if (c == L'(') {
|
||||
paran_offsets.push_back(this->buff - this->start);
|
||||
expecting.push_back(L')');
|
||||
mode |= tok_mode::subshell;
|
||||
}
|
||||
else if (c == L'{') {
|
||||
paran_offsets.push_back(this->buff - this->start);
|
||||
expecting.push_back(L'}');
|
||||
mode |= tok_mode::curly_braces;
|
||||
}
|
||||
else if (c == L')') {
|
||||
if (expecting.size() > 0 && expecting.back() == L'}') {
|
||||
return this->call_error(TOK_EXPECTED_BCLOSE_FOUND_PCLOSE, this->start, this->buff);
|
||||
}
|
||||
switch (paran_offsets.size()) {
|
||||
case 0:
|
||||
return this->call_error(TOK_CLOSING_UNOPENED_SUBSHELL, this->start, this->buff);
|
||||
|
@ -187,6 +163,19 @@ tok_t tokenizer_t::read_string() {
|
|||
paran_offsets.pop_back();
|
||||
}
|
||||
}
|
||||
else if (c == L'}') {
|
||||
if (expecting.size() > 0 && expecting.back() == L')') {
|
||||
return this->call_error(TOK_EXPECTED_PCLOSE_FOUND_BCLOSE, this->start, this->buff);
|
||||
}
|
||||
switch (paran_offsets.size()) {
|
||||
case 0:
|
||||
return this->call_error(TOK_CLOSING_UNOPENED_BRACE, this->start, this->buff);
|
||||
case 1:
|
||||
mode &= ~(tok_mode::curly_braces);
|
||||
default:
|
||||
paran_offsets.pop_back();
|
||||
}
|
||||
}
|
||||
else if (c == L'[') {
|
||||
if (this->buff != buff_start) {
|
||||
if ((mode & tok_mode::array_brackets) == tok_mode::array_brackets) {
|
||||
|
@ -257,6 +246,13 @@ tok_t tokenizer_t::read_string() {
|
|||
error = this->call_error(TOK_UNTERMINATED_SUBSHELL, buff_start,
|
||||
this->start + offset_of_open_paran);
|
||||
}
|
||||
else if ((mode & tok_mode::curly_braces) == tok_mode::curly_braces) {
|
||||
assert(paran_offsets.size() > 0);
|
||||
size_t offset_of_open_brace = paran_offsets.back();
|
||||
|
||||
error = this->call_error(TOK_UNTERMINATED_BRACE, buff_start,
|
||||
this->start + offset_of_open_brace);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include "common.h"
|
||||
#include "maybe.h"
|
||||
#include "parse_constants.h"
|
||||
|
||||
/// Token types.
|
||||
enum token_type {
|
||||
|
@ -22,19 +23,26 @@ enum token_type {
|
|||
TOK_COMMENT /// comment token
|
||||
};
|
||||
|
||||
/// Tokenizer error types.
|
||||
enum tokenizer_error {
|
||||
TOK_ERROR_NONE,
|
||||
TOK_UNTERMINATED_QUOTE,
|
||||
TOK_UNTERMINATED_SUBSHELL,
|
||||
TOK_UNTERMINATED_SLICE,
|
||||
TOK_UNTERMINATED_ESCAPE,
|
||||
TOK_INVALID_REDIRECT,
|
||||
TOK_INVALID_PIPE,
|
||||
TOK_CLOSING_UNOPENED_SUBSHELL,
|
||||
TOK_ILLEGAL_SLICE,
|
||||
struct tokenizer_error {
|
||||
const wchar_t *Message;
|
||||
enum parse_error_code_t parser_error; //the parser error associated with this tokenizer error
|
||||
tokenizer_error(const wchar_t *msg, enum parse_error_code_t perr = parse_error_tokenizer_other)
|
||||
: Message(msg), parser_error(perr) {}
|
||||
tokenizer_error(const tokenizer_error&) = delete;
|
||||
};
|
||||
|
||||
extern tokenizer_error *TOK_ERROR_NONE;
|
||||
extern tokenizer_error *TOK_UNTERMINATED_QUOTE;
|
||||
extern tokenizer_error *TOK_UNTERMINATED_SUBSHELL;
|
||||
extern tokenizer_error *TOK_UNTERMINATED_SLICE;
|
||||
extern tokenizer_error *TOK_UNTERMINATED_ESCAPE;
|
||||
extern tokenizer_error *TOK_UNTERMINATED_BRACE;
|
||||
extern tokenizer_error *TOK_INVALID_REDIRECT;
|
||||
extern tokenizer_error *TOK_INVALID_PIPE;
|
||||
extern tokenizer_error *TOK_CLOSING_UNOPENED_SUBSHELL;
|
||||
extern tokenizer_error *TOK_CLOSING_UNOPENED_BRACE;
|
||||
extern tokenizer_error *TOK_ILLEGAL_SLICE;
|
||||
|
||||
enum class redirection_type_t {
|
||||
overwrite, // normal redirection: > file.txt
|
||||
append, // appending redirection: >> file.txt
|
||||
|
@ -69,7 +77,7 @@ struct tok_t {
|
|||
maybe_t<int> redirected_fd{};
|
||||
|
||||
// If an error, this is the error code.
|
||||
enum tokenizer_error error { TOK_ERROR_NONE };
|
||||
tokenizer_error *error { TOK_ERROR_NONE };
|
||||
|
||||
// If an error, this is the offset of the error within the token. A value of 0 means it occurred
|
||||
// at 'offset'.
|
||||
|
@ -99,7 +107,7 @@ class tokenizer_t {
|
|||
/// Whether to continue the previous line after the comment.
|
||||
bool continue_line_after_comment{false};
|
||||
|
||||
tok_t call_error(enum tokenizer_error error_type, const wchar_t *token_start,
|
||||
tok_t call_error(tokenizer_error *error_type, const wchar_t *token_start,
|
||||
const wchar_t *error_loc);
|
||||
tok_t read_string();
|
||||
maybe_t<tok_t> tok_next();
|
||||
|
|
|
@ -45,3 +45,21 @@ wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype) {
|
|||
output.push_back(ellipsis_char);
|
||||
return output;
|
||||
}
|
||||
|
||||
wcstring trim(const wcstring &input) {
|
||||
debug(0, "trimming '%ls'", input.c_str());
|
||||
|
||||
// auto begin = input.cbegin();
|
||||
// for (begin; *begin == L' '; ++begin);
|
||||
// auto end = input.cbegin() + input.size();
|
||||
// for (end; end > begin && *end == L' '; ++end);
|
||||
|
||||
auto begin_offset = input.find_first_not_of(whitespace);
|
||||
if (begin_offset == wcstring::npos) {
|
||||
return wcstring{};
|
||||
}
|
||||
auto end = input.cbegin() + input.find_last_not_of(whitespace);
|
||||
|
||||
wcstring result(input.begin() + begin_offset, end + 1);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -59,5 +59,6 @@ enum class ellipsis_type {
|
|||
};
|
||||
|
||||
wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype = ellipsis_type::Prettiest);
|
||||
wcstring trim(const wcstring &input);
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue