Make { and } valid, first-class tokenizer elements

This commit is contained in:
Mahmoud Al-Qudsi 2018-03-11 19:36:10 -05:00
parent 7447432471
commit 00f95a978e
6 changed files with 89 additions and 89 deletions

View file

@ -47,6 +47,7 @@
#include "proc.h"
#include "reader.h"
#include "wildcard.h"
#include "wcstringutil.h"
#include "wutil.h" // IWYU pragma: keep
#ifdef KERN_PROCARGS2
#else
@ -941,7 +942,8 @@ static expand_error_t expand_braces(const wcstring &instr, expand_flags_t flags,
whole_item.append(in, length_preceding_braces);
whole_item.append(item_begin, item_len);
whole_item.append(brace_end + 1);
debug(0, L"Found brace item: %ls\n", whole_item.c_str());
auto whole_item2 = trim(whole_item);
debug(0, L"Found brace item: %ls\n", whole_item2.c_str());
expand_braces(whole_item, flags, out, errors);
item_begin = pos + 1;

View file

@ -668,35 +668,10 @@ void parse_ll_t::parse_error_failed_production(struct parse_stack_element_t &sta
}
void parse_ll_t::report_tokenizer_error(const tokenizer_t &tokenizer, const tok_t &tok) {
parse_error_code_t parse_error_code;
switch (tok.error) {
case TOK_UNTERMINATED_QUOTE: {
parse_error_code = parse_error_tokenizer_unterminated_quote;
break;
}
case TOK_UNTERMINATED_SUBSHELL: {
parse_error_code = parse_error_tokenizer_unterminated_subshell;
break;
}
case TOK_UNTERMINATED_SLICE: {
parse_error_code = parse_error_tokenizer_unterminated_slice;
break;
}
case TOK_UNTERMINATED_ESCAPE: {
parse_error_code = parse_error_tokenizer_unterminated_escape;
break;
}
case TOK_INVALID_REDIRECT:
case TOK_INVALID_PIPE:
default: {
parse_error_code = parse_error_tokenizer_other;
break;
}
}
parse_error_code_t parse_error_code = tok.error->parser_error;
this->parse_error_at_location(tok.offset, tok.length, tok.offset + tok.error_offset,
parse_error_code, L"%ls",
error_message_for_code(tok.error).c_str());
tok.error->Message);
}
void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token) {

View file

@ -16,56 +16,22 @@
#include "tokenizer.h"
#include "wutil.h" // IWYU pragma: keep
/// Error string for unexpected end of string.
#define QUOTE_ERROR _(L"Unexpected end of string, quotes are not balanced")
/// Error string for mismatched parenthesis.
#define PARAN_ERROR _(L"Unexpected end of string, parenthesis do not match")
/// Error string for mismatched square brackets.
#define SQUARE_BRACKET_ERROR _(L"Unexpected end of string, square brackets do not match")
/// Error string for unterminated escape (backslash without continuation).
#define UNTERMINATED_ESCAPE_ERROR _(L"Unexpected end of string, incomplete escape sequence")
/// Error string for invalid redirections.
#define REDIRECT_ERROR _(L"Invalid input/output redirection")
/// Error string for when trying to pipe from fd 0.
#define PIPE_ERROR _(L"Cannot use stdin (fd 0) as pipe output")
/// Error for when ) is encountered with no matching (
#define ERROR_CLOSING_UNOPENED_PARENTHESIS _(L"Unexpected ')' for unopened parenthesis")
/// Error for when [ is encountered while already in bracket mode
#define ERROR_UNEXPECTED_BRACKET _(L"Unexpected '[' at this location")
wcstring error_message_for_code(tokenizer_error err) {
switch (err) {
case TOK_UNTERMINATED_QUOTE:
return QUOTE_ERROR;
case TOK_UNTERMINATED_SUBSHELL:
return PARAN_ERROR;
case TOK_UNTERMINATED_SLICE:
return SQUARE_BRACKET_ERROR;
case TOK_UNTERMINATED_ESCAPE:
return UNTERMINATED_ESCAPE_ERROR;
case TOK_INVALID_REDIRECT:
return REDIRECT_ERROR;
case TOK_INVALID_PIPE:
return PIPE_ERROR;
case TOK_CLOSING_UNOPENED_SUBSHELL:
return ERROR_CLOSING_UNOPENED_PARENTHESIS;
case TOK_ILLEGAL_SLICE:
return ERROR_UNEXPECTED_BRACKET;
default:
assert(0 && "Unknown error type");
return {};
}
}
tokenizer_error *TOK_ERROR_NONE = new tokenizer_error(L"");
tokenizer_error *TOK_UNTERMINATED_QUOTE = new tokenizer_error((L"Unexpected end of string, quotes are not balanced"), parse_error_tokenizer_unterminated_quote);
tokenizer_error *TOK_UNTERMINATED_SUBSHELL = new tokenizer_error((L"Unexpected end of string, expecting ')'"), parse_error_tokenizer_unterminated_subshell);
tokenizer_error *TOK_UNTERMINATED_SLICE = new tokenizer_error((L"Unexpected end of string, square brackets do not match"), parse_error_tokenizer_unterminated_slice);
tokenizer_error *TOK_UNTERMINATED_ESCAPE = new tokenizer_error((L"Unexpected end of string, incomplete escape sequence"), parse_error_tokenizer_unterminated_escape);
tokenizer_error *TOK_INVALID_REDIRECT = new tokenizer_error((L"Invalid input/output redirection"));
tokenizer_error *TOK_INVALID_PIPE = new tokenizer_error((L"Cannot use stdin (fd 0) as pipe output"));
tokenizer_error *TOK_CLOSING_UNOPENED_SUBSHELL = new tokenizer_error((L"Unexpected ')' for unopened parenthesis"));
tokenizer_error *TOK_ILLEGAL_SLICE = new tokenizer_error((L"Unexpected '[' at this location"));
tokenizer_error *TOK_CLOSING_UNOPENED_BRACE = new tokenizer_error((L"Unexpected '}' for unopened brace expansion"));
tokenizer_error *TOK_UNTERMINATED_BRACE = new tokenizer_error((L"Unexpected end of string, incomplete parameter expansion"));
tokenizer_error *TOK_EXPECTED_PCLOSE_FOUND_BCLOSE = new tokenizer_error((L"Unexpected '}' found, expecting ')'"));
tokenizer_error *TOK_EXPECTED_BCLOSE_FOUND_PCLOSE = new tokenizer_error((L"Unexpected ')' found, expecting '}'"));
/// Return an error token and mark that we no longer have a next token.
tok_t tokenizer_t::call_error(enum tokenizer_error error_type, const wchar_t *token_start,
tok_t tokenizer_t::call_error(tokenizer_error *error_type, const wchar_t *token_start,
const wchar_t *error_loc) {
assert(error_type != TOK_ERROR_NONE && "TOK_ERROR_NONE passed to call_error");
assert(error_loc >= token_start && "Invalid error location");
@ -143,6 +109,7 @@ ENUM_FLAGS(tok_mode) {
tok_t tokenizer_t::read_string() {
tok_mode mode { tok_mode::regular_text };
std::vector<int> paran_offsets;
std::vector<char> expecting;
int slice_offset = 0;
const wchar_t *const buff_start = this->buff;
bool is_first = true;
@ -175,9 +142,18 @@ tok_t tokenizer_t::read_string() {
}
else if (c == L'(') {
paran_offsets.push_back(this->buff - this->start);
expecting.push_back(L')');
mode |= tok_mode::subshell;
}
else if (c == L'{') {
paran_offsets.push_back(this->buff - this->start);
expecting.push_back(L'}');
mode |= tok_mode::curly_braces;
}
else if (c == L')') {
if (expecting.size() > 0 && expecting.back() == L'}') {
return this->call_error(TOK_EXPECTED_BCLOSE_FOUND_PCLOSE, this->start, this->buff);
}
switch (paran_offsets.size()) {
case 0:
return this->call_error(TOK_CLOSING_UNOPENED_SUBSHELL, this->start, this->buff);
@ -187,6 +163,19 @@ tok_t tokenizer_t::read_string() {
paran_offsets.pop_back();
}
}
else if (c == L'}') {
if (expecting.size() > 0 && expecting.back() == L')') {
return this->call_error(TOK_EXPECTED_PCLOSE_FOUND_BCLOSE, this->start, this->buff);
}
switch (paran_offsets.size()) {
case 0:
return this->call_error(TOK_CLOSING_UNOPENED_BRACE, this->start, this->buff);
case 1:
mode &= ~(tok_mode::curly_braces);
default:
paran_offsets.pop_back();
}
}
else if (c == L'[') {
if (this->buff != buff_start) {
if ((mode & tok_mode::array_brackets) == tok_mode::array_brackets) {
@ -257,6 +246,13 @@ tok_t tokenizer_t::read_string() {
error = this->call_error(TOK_UNTERMINATED_SUBSHELL, buff_start,
this->start + offset_of_open_paran);
}
else if ((mode & tok_mode::curly_braces) == tok_mode::curly_braces) {
assert(paran_offsets.size() > 0);
size_t offset_of_open_brace = paran_offsets.back();
error = this->call_error(TOK_UNTERMINATED_BRACE, buff_start,
this->start + offset_of_open_brace);
}
return error;
}

View file

@ -7,6 +7,7 @@
#include "common.h"
#include "maybe.h"
#include "parse_constants.h"
/// Token types.
enum token_type {
@ -22,19 +23,26 @@ enum token_type {
TOK_COMMENT /// comment token
};
/// Tokenizer error types.
enum tokenizer_error {
TOK_ERROR_NONE,
TOK_UNTERMINATED_QUOTE,
TOK_UNTERMINATED_SUBSHELL,
TOK_UNTERMINATED_SLICE,
TOK_UNTERMINATED_ESCAPE,
TOK_INVALID_REDIRECT,
TOK_INVALID_PIPE,
TOK_CLOSING_UNOPENED_SUBSHELL,
TOK_ILLEGAL_SLICE,
struct tokenizer_error {
const wchar_t *Message;
enum parse_error_code_t parser_error; //the parser error associated with this tokenizer error
tokenizer_error(const wchar_t *msg, enum parse_error_code_t perr = parse_error_tokenizer_other)
: Message(msg), parser_error(perr) {}
tokenizer_error(const tokenizer_error&) = delete;
};
extern tokenizer_error *TOK_ERROR_NONE;
extern tokenizer_error *TOK_UNTERMINATED_QUOTE;
extern tokenizer_error *TOK_UNTERMINATED_SUBSHELL;
extern tokenizer_error *TOK_UNTERMINATED_SLICE;
extern tokenizer_error *TOK_UNTERMINATED_ESCAPE;
extern tokenizer_error *TOK_UNTERMINATED_BRACE;
extern tokenizer_error *TOK_INVALID_REDIRECT;
extern tokenizer_error *TOK_INVALID_PIPE;
extern tokenizer_error *TOK_CLOSING_UNOPENED_SUBSHELL;
extern tokenizer_error *TOK_CLOSING_UNOPENED_BRACE;
extern tokenizer_error *TOK_ILLEGAL_SLICE;
enum class redirection_type_t {
overwrite, // normal redirection: > file.txt
append, // appending redirection: >> file.txt
@ -69,7 +77,7 @@ struct tok_t {
maybe_t<int> redirected_fd{};
// If an error, this is the error code.
enum tokenizer_error error { TOK_ERROR_NONE };
tokenizer_error *error { TOK_ERROR_NONE };
// If an error, this is the offset of the error within the token. A value of 0 means it occurred
// at 'offset'.
@ -99,7 +107,7 @@ class tokenizer_t {
/// Whether to continue the previous line after the comment.
bool continue_line_after_comment{false};
tok_t call_error(enum tokenizer_error error_type, const wchar_t *token_start,
tok_t call_error(tokenizer_error *error_type, const wchar_t *token_start,
const wchar_t *error_loc);
tok_t read_string();
maybe_t<tok_t> tok_next();

View file

@ -45,3 +45,21 @@ wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype) {
output.push_back(ellipsis_char);
return output;
}
wcstring trim(const wcstring &input) {
debug(0, "trimming '%ls'", input.c_str());
// auto begin = input.cbegin();
// for (begin; *begin == L' '; ++begin);
// auto end = input.cbegin() + input.size();
// for (end; end > begin && *end == L' '; ++end);
auto begin_offset = input.find_first_not_of(whitespace);
if (begin_offset == wcstring::npos) {
return wcstring{};
}
auto end = input.cbegin() + input.find_last_not_of(whitespace);
wcstring result(input.begin() + begin_offset, end + 1);
return result;
}

View file

@ -59,5 +59,6 @@ enum class ellipsis_type {
};
wcstring truncate(const wcstring &input, int max_len, ellipsis_type etype = ellipsis_type::Prettiest);
wcstring trim(const wcstring &input);
#endif