mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-28 04:35:09 +00:00
Continue migration to the new tokenizer interface
This commit is contained in:
parent
618896c043
commit
4ebaa7b6bd
8 changed files with 80 additions and 159 deletions
|
@ -196,17 +196,18 @@ static void write_part(const wchar_t *begin,
|
||||||
// fwprintf( stderr, L"Subshell: %ls, end char %lc\n", buff, *end );
|
// fwprintf( stderr, L"Subshell: %ls, end char %lc\n", buff, *end );
|
||||||
wcstring out;
|
wcstring out;
|
||||||
tokenizer_t tok(buff, TOK_ACCEPT_UNFINISHED);
|
tokenizer_t tok(buff, TOK_ACCEPT_UNFINISHED);
|
||||||
for (; tok_has_next(&tok); tok_next(&tok))
|
tok_t token;
|
||||||
|
while (tok.next(&token))
|
||||||
{
|
{
|
||||||
if ((cut_at_cursor) &&
|
if ((cut_at_cursor) &&
|
||||||
(tok_get_pos(&tok)+wcslen(tok_last(&tok)) >= pos))
|
(token.offset + token.text.size() >= pos))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
switch (tok_last_type(&tok))
|
switch (token.type)
|
||||||
{
|
{
|
||||||
case TOK_STRING:
|
case TOK_STRING:
|
||||||
{
|
{
|
||||||
wcstring tmp = tok_last(&tok);
|
wcstring tmp = token.text;
|
||||||
unescape_string_in_place(&tmp, UNESCAPE_INCOMPLETE);
|
unescape_string_in_place(&tmp, UNESCAPE_INCOMPLETE);
|
||||||
out.append(tmp);
|
out.append(tmp);
|
||||||
out.push_back(L'\n');
|
out.push_back(L'\n');
|
||||||
|
|
|
@ -435,34 +435,7 @@ static void test_convert_nulls(void)
|
||||||
*/
|
*/
|
||||||
static void test_tok()
|
static void test_tok()
|
||||||
{
|
{
|
||||||
|
|
||||||
say(L"Testing tokenizer");
|
say(L"Testing tokenizer");
|
||||||
|
|
||||||
|
|
||||||
say(L"Testing invalid input");
|
|
||||||
tokenizer_t t(NULL, 0);
|
|
||||||
|
|
||||||
if (tok_last_type(&t) != TOK_ERROR)
|
|
||||||
{
|
|
||||||
err(L"Invalid input to tokenizer was undetected");
|
|
||||||
}
|
|
||||||
|
|
||||||
say(L"Testing use of broken tokenizer");
|
|
||||||
if (!tok_has_next(&t))
|
|
||||||
{
|
|
||||||
err(L"tok_has_next() should return 1 once on broken tokenizer");
|
|
||||||
}
|
|
||||||
|
|
||||||
tok_next(&t);
|
|
||||||
if (tok_last_type(&t) != TOK_ERROR)
|
|
||||||
{
|
|
||||||
err(L"Invalid input to tokenizer was undetected");
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
This should crash if there is a bug. No reliable way to detect otherwise.
|
|
||||||
*/
|
|
||||||
say(L"Test destruction of broken tokenizer");
|
|
||||||
{
|
{
|
||||||
|
|
||||||
const wchar_t *str = L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells ){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect Compress_Newlines\n \n\t\n \nInto_Just_One";
|
const wchar_t *str = L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells ){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect Compress_Newlines\n \n\t\n \nInto_Just_One";
|
||||||
|
|
|
@ -632,7 +632,7 @@ public:
|
||||||
void accept_tokens(parse_token_t token1, parse_token_t token2);
|
void accept_tokens(parse_token_t token1, parse_token_t token2);
|
||||||
|
|
||||||
/* Report tokenizer errors */
|
/* Report tokenizer errors */
|
||||||
void report_tokenizer_error(parse_token_t token, int tok_err, const wchar_t *tok_error);
|
void report_tokenizer_error(parse_token_t token, int tok_err, const wcstring &tok_error);
|
||||||
|
|
||||||
/* Indicate if we hit a fatal error */
|
/* Indicate if we hit a fatal error */
|
||||||
bool has_fatal_error(void) const
|
bool has_fatal_error(void) const
|
||||||
|
@ -887,9 +887,8 @@ void parse_ll_t::parse_error_failed_production(struct parse_stack_element_t &sta
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_ll_t::report_tokenizer_error(parse_token_t token, int tok_err_code, const wchar_t *tok_error)
|
void parse_ll_t::report_tokenizer_error(parse_token_t token, int tok_err_code, const wcstring &tok_error)
|
||||||
{
|
{
|
||||||
assert(tok_error != NULL);
|
|
||||||
parse_error_code_t parse_error_code;
|
parse_error_code_t parse_error_code;
|
||||||
switch (tok_err_code)
|
switch (tok_err_code)
|
||||||
{
|
{
|
||||||
|
@ -911,7 +910,7 @@ void parse_ll_t::report_tokenizer_error(parse_token_t token, int tok_err_code, c
|
||||||
break;
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
this->parse_error(token, parse_error_code, L"%ls", tok_error);
|
this->parse_error(token, parse_error_code, L"%ls", tok_error.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token)
|
void parse_ll_t::parse_error_unexpected_token(const wchar_t *expected, parse_token_t token)
|
||||||
|
@ -1200,7 +1199,7 @@ static parse_keyword_t keyword_with_name(const wchar_t *name)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Given a token, returns the keyword it matches, or parse_keyword_none. */
|
/* Given a token, returns the keyword it matches, or parse_keyword_none. */
|
||||||
static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt)
|
static parse_keyword_t keyword_for_token(token_type tok, const wcstring &token)
|
||||||
{
|
{
|
||||||
/* Only strings can be keywords */
|
/* Only strings can be keywords */
|
||||||
if (tok != TOK_STRING)
|
if (tok != TOK_STRING)
|
||||||
|
@ -1211,6 +1210,7 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt)
|
||||||
/* If tok_txt is clean (which most are), we can compare it directly. Otherwise we have to expand it. We only expand quotes, and we don't want to do expensive expansions like tilde expansions. So we do our own "cleanliness" check; if we find a character not in our allowed set we know it's not a keyword, and if we never find a quote we don't have to expand! Note that this lowercase set could be shrunk to be just the characters that are in keywords. */
|
/* If tok_txt is clean (which most are), we can compare it directly. Otherwise we have to expand it. We only expand quotes, and we don't want to do expensive expansions like tilde expansions. So we do our own "cleanliness" check; if we find a character not in our allowed set we know it's not a keyword, and if we never find a quote we don't have to expand! Note that this lowercase set could be shrunk to be just the characters that are in keywords. */
|
||||||
parse_keyword_t result = parse_keyword_none;
|
parse_keyword_t result = parse_keyword_none;
|
||||||
bool needs_expand = false, all_chars_valid = true;
|
bool needs_expand = false, all_chars_valid = true;
|
||||||
|
const wchar_t *tok_txt = token.c_str();
|
||||||
const wchar_t *chars_allowed_in_keywords = L"abcdefghijklmnopqrstuvwxyz'\"";
|
const wchar_t *chars_allowed_in_keywords = L"abcdefghijklmnopqrstuvwxyz'\"";
|
||||||
for (size_t i=0; tok_txt[i] != L'\0'; i++)
|
for (size_t i=0; tok_txt[i] != L'\0'; i++)
|
||||||
{
|
{
|
||||||
|
@ -1249,36 +1249,34 @@ static const parse_token_t kInvalidToken = {token_type_invalid, parse_keyword_no
|
||||||
/* Terminal token */
|
/* Terminal token */
|
||||||
static const parse_token_t kTerminalToken = {parse_token_type_terminate, parse_keyword_none, false, false, SOURCE_OFFSET_INVALID, 0};
|
static const parse_token_t kTerminalToken = {parse_token_type_terminate, parse_keyword_none, false, false, SOURCE_OFFSET_INVALID, 0};
|
||||||
|
|
||||||
static inline bool is_help_argument(const wchar_t *txt)
|
static inline bool is_help_argument(const wcstring &txt)
|
||||||
{
|
{
|
||||||
return ! wcscmp(txt, L"-h") || ! wcscmp(txt, L"--help");
|
return contains(txt, L"-h", L"--help");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return a new parse token, advancing the tokenizer */
|
/* Return a new parse token, advancing the tokenizer */
|
||||||
static inline parse_token_t next_parse_token(tokenizer_t *tok)
|
static inline parse_token_t next_parse_token(tokenizer_t *tok, tok_t *token)
|
||||||
{
|
{
|
||||||
if (! tok_has_next(tok))
|
if (! tok->next(token))
|
||||||
{
|
{
|
||||||
return kTerminalToken;
|
return kTerminalToken;
|
||||||
}
|
}
|
||||||
|
|
||||||
token_type tok_type = static_cast<token_type>(tok_last_type(tok));
|
|
||||||
int tok_start = tok_get_pos(tok);
|
|
||||||
size_t tok_extent = tok_get_extent(tok);
|
|
||||||
assert(tok_extent < 10000000); //paranoia
|
|
||||||
const wchar_t *tok_txt = tok_last(tok);
|
|
||||||
|
|
||||||
parse_token_t result;
|
parse_token_t result;
|
||||||
|
|
||||||
/* Set the type, keyword, and whether there's a dash prefix. Note that this is quite sketchy, because it ignores quotes. This is the historical behavior. For example, `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. Squint at it really hard and it even starts to look like a feature. */
|
/* Set the type, keyword, and whether there's a dash prefix. Note that this is quite sketchy, because it ignores quotes. This is the historical behavior. For example, `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. Squint at it really hard and it even starts to look like a feature. */
|
||||||
result.type = parse_token_type_from_tokenizer_token(tok_type);
|
result.type = parse_token_type_from_tokenizer_token(token->type);
|
||||||
result.keyword = keyword_for_token(tok_type, tok_txt);
|
result.keyword = keyword_for_token(token->type, token->text);
|
||||||
result.has_dash_prefix = (tok_txt[0] == L'-');
|
result.has_dash_prefix = !token->text.empty() && token->text.at(0) == L'-';
|
||||||
result.is_help_argument = result.has_dash_prefix && is_help_argument(tok_txt);
|
result.is_help_argument = result.has_dash_prefix && is_help_argument(token->text);
|
||||||
result.source_start = (source_offset_t)tok_start;
|
|
||||||
result.source_length = (source_offset_t)tok_extent;
|
/* These assertions are totally bogus. Basically our tokenizer works in size_t but we work in uint32_t to save some space. If we have a source file larger than 4 GB, we'll probably just crash. */
|
||||||
|
assert(token->offset < SOURCE_OFFSET_INVALID);
|
||||||
|
result.source_start = (source_offset_t)token->offset;
|
||||||
|
|
||||||
|
assert(token->length <= SOURCE_OFFSET_INVALID);
|
||||||
|
result.source_length = (source_offset_t)token->length;
|
||||||
|
|
||||||
tok_next(tok);
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1307,11 +1305,12 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags,
|
||||||
parse_token_t queue[2] = {kInvalidToken, kInvalidToken};
|
parse_token_t queue[2] = {kInvalidToken, kInvalidToken};
|
||||||
|
|
||||||
/* Loop until we have a terminal token. */
|
/* Loop until we have a terminal token. */
|
||||||
|
tok_t tokenizer_token;
|
||||||
for (size_t token_count = 0; queue[0].type != parse_token_type_terminate; token_count++)
|
for (size_t token_count = 0; queue[0].type != parse_token_type_terminate; token_count++)
|
||||||
{
|
{
|
||||||
/* Push a new token onto the queue */
|
/* Push a new token onto the queue */
|
||||||
queue[0] = queue[1];
|
queue[0] = queue[1];
|
||||||
queue[1] = next_parse_token(&tok);
|
queue[1] = next_parse_token(&tok, &tokenizer_token);
|
||||||
|
|
||||||
/* If we are leaving things unterminated, then don't pass parse_token_type_terminate */
|
/* If we are leaving things unterminated, then don't pass parse_token_type_terminate */
|
||||||
if (queue[0].type == parse_token_type_terminate && (parse_flags & parse_flag_leave_unterminated))
|
if (queue[0].type == parse_token_type_terminate && (parse_flags & parse_flag_leave_unterminated))
|
||||||
|
@ -1328,7 +1327,7 @@ bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t parse_flags,
|
||||||
/* Handle tokenizer errors. This is a hack because really the parser should report this for itself; but it has no way of getting the tokenizer message */
|
/* Handle tokenizer errors. This is a hack because really the parser should report this for itself; but it has no way of getting the tokenizer message */
|
||||||
if (queue[1].type == parse_special_type_tokenizer_error)
|
if (queue[1].type == parse_special_type_tokenizer_error)
|
||||||
{
|
{
|
||||||
parser.report_tokenizer_error(queue[1], tok_get_error(&tok), tok_last(&tok));
|
parser.report_tokenizer_error(queue[1], tokenizer_token.error, tokenizer_token.text);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Handle errors */
|
/* Handle errors */
|
||||||
|
|
|
@ -358,7 +358,6 @@ static void job_or_process_extent(const wchar_t *buff,
|
||||||
int process)
|
int process)
|
||||||
{
|
{
|
||||||
const wchar_t *begin, *end;
|
const wchar_t *begin, *end;
|
||||||
long pos;
|
|
||||||
wchar_t *buffcpy;
|
wchar_t *buffcpy;
|
||||||
int finished=0;
|
int finished=0;
|
||||||
|
|
||||||
|
@ -380,7 +379,8 @@ static void job_or_process_extent(const wchar_t *buff,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
pos = cursor_pos - (begin - buff);
|
assert(cursor_pos >= (begin - buff));
|
||||||
|
const size_t pos = cursor_pos - (begin - buff);
|
||||||
|
|
||||||
if (a)
|
if (a)
|
||||||
{
|
{
|
||||||
|
@ -400,11 +400,12 @@ static void job_or_process_extent(const wchar_t *buff,
|
||||||
}
|
}
|
||||||
|
|
||||||
tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED);
|
tokenizer_t tok(buffcpy, TOK_ACCEPT_UNFINISHED);
|
||||||
for (; tok_has_next(&tok) && !finished; tok_next(&tok))
|
tok_t token;
|
||||||
|
while (tok.next(&token) && !finished)
|
||||||
{
|
{
|
||||||
int tok_begin = tok_get_pos(&tok);
|
size_t tok_begin = token.offset;
|
||||||
|
|
||||||
switch (tok_last_type(&tok))
|
switch (token.type)
|
||||||
{
|
{
|
||||||
case TOK_PIPE:
|
case TOK_PIPE:
|
||||||
{
|
{
|
||||||
|
@ -501,17 +502,18 @@ void parse_util_token_extent(const wchar_t *buff,
|
||||||
const wcstring buffcpy = wcstring(cmdsubst_begin, cmdsubst_end-cmdsubst_begin);
|
const wcstring buffcpy = wcstring(cmdsubst_begin, cmdsubst_end-cmdsubst_begin);
|
||||||
|
|
||||||
tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
||||||
for (; tok_has_next(&tok); tok_next(&tok))
|
tok_t token;
|
||||||
|
while (tok.next(&token))
|
||||||
{
|
{
|
||||||
size_t tok_begin = tok_get_pos(&tok);
|
size_t tok_begin = token.offset;
|
||||||
size_t tok_end = tok_begin;
|
size_t tok_end = tok_begin;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Calculate end of token
|
Calculate end of token
|
||||||
*/
|
*/
|
||||||
if (tok_last_type(&tok) == TOK_STRING)
|
if (token.type == TOK_STRING)
|
||||||
{
|
{
|
||||||
tok_end += wcslen(tok_last(&tok));
|
tok_end += token.text.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -529,20 +531,20 @@ void parse_util_token_extent(const wchar_t *buff,
|
||||||
If cursor is inside the token, this is the token we are
|
If cursor is inside the token, this is the token we are
|
||||||
looking for. If so, set a and b and break
|
looking for. If so, set a and b and break
|
||||||
*/
|
*/
|
||||||
if ((tok_last_type(&tok) == TOK_STRING) && (tok_end >= offset_within_cmdsubst))
|
if (token.type == TOK_STRING && tok_end >= offset_within_cmdsubst)
|
||||||
{
|
{
|
||||||
a = cmdsubst_begin + tok_get_pos(&tok);
|
a = cmdsubst_begin + token.offset;
|
||||||
b = a + wcslen(tok_last(&tok));
|
b = a + token.text.size();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Remember previous string token
|
Remember previous string token
|
||||||
*/
|
*/
|
||||||
if (tok_last_type(&tok) == TOK_STRING)
|
if (token.type == TOK_STRING)
|
||||||
{
|
{
|
||||||
pa = cmdsubst_begin + tok_get_pos(&tok);
|
pa = cmdsubst_begin + token.offset;
|
||||||
pb = pa + wcslen(tok_last(&tok));
|
pb = pa + token.text.size();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -684,10 +686,11 @@ wchar_t *parse_util_unescape_wildcards(const wchar_t *str)
|
||||||
token is not quoted.
|
token is not quoted.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
static wchar_t get_quote(const wchar_t *cmd, size_t len)
|
static wchar_t get_quote(const wcstring &cmd_str, size_t len)
|
||||||
{
|
{
|
||||||
size_t i=0;
|
size_t i=0;
|
||||||
wchar_t res=0;
|
wchar_t res=0;
|
||||||
|
const wchar_t * const cmd = cmd_str.c_str();
|
||||||
|
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
|
@ -722,26 +725,26 @@ static wchar_t get_quote(const wchar_t *cmd, size_t len)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_t *quote, size_t *offset, int *type)
|
void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_t *quote, size_t *offset, enum token_type *out_type)
|
||||||
{
|
{
|
||||||
size_t prev_pos=0;
|
size_t prev_pos=0;
|
||||||
wchar_t last_quote = '\0';
|
wchar_t last_quote = '\0';
|
||||||
int unfinished;
|
int unfinished;
|
||||||
|
|
||||||
tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SQUASH_ERRORS);
|
||||||
for (; tok_has_next(&tok); tok_next(&tok))
|
tok_t token;
|
||||||
|
while (tok.next(&token))
|
||||||
{
|
{
|
||||||
if (tok_get_pos(&tok) > pos)
|
if (token.offset > pos)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (tok_last_type(&tok) == TOK_STRING)
|
if (token.type == TOK_STRING)
|
||||||
last_quote = get_quote(tok_last(&tok),
|
last_quote = get_quote(token.text, pos - token.offset);
|
||||||
pos - tok_get_pos(&tok));
|
|
||||||
|
|
||||||
if (type != NULL)
|
if (out_type != NULL)
|
||||||
*type = tok_last_type(&tok);
|
*out_type = token.type;
|
||||||
|
|
||||||
prev_pos = tok_get_pos(&tok);
|
prev_pos = token.offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
wchar_t *cmd_tmp = wcsdup(cmd.c_str());
|
wchar_t *cmd_tmp = wcsdup(cmd.c_str());
|
||||||
|
|
|
@ -166,9 +166,9 @@ bool parse_util_argument_is_help(const wchar_t *s, int min_match);
|
||||||
\param pos An index in the string which is inside the parameter
|
\param pos An index in the string which is inside the parameter
|
||||||
\param quote If not NULL, store the type of quote this parameter has, can be either ', " or \\0, meaning the string is not quoted.
|
\param quote If not NULL, store the type of quote this parameter has, can be either ', " or \\0, meaning the string is not quoted.
|
||||||
\param offset If not NULL, get_param will store the offset to the beginning of the parameter.
|
\param offset If not NULL, get_param will store the offset to the beginning of the parameter.
|
||||||
\param type If not NULL, get_param will store the token type as returned by tok_last.
|
\param type If not NULL, get_param will store the token type.
|
||||||
*/
|
*/
|
||||||
void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_t *quote, size_t *offset, int *type);
|
void parse_util_get_parameter_info(const wcstring &cmd, const size_t pos, wchar_t *quote, size_t *offset, enum token_type *out_type);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Attempts to escape the string 'cmd' using the given quote type, as determined by the quote character. The quote can be a single quote or double quote, or L'\0' to indicate no quoting (and thus escaping should be with backslashes).
|
Attempts to escape the string 'cmd' using the given quote type, as determined by the quote character. The quote can be a single quote or double quote, or L'\0' to indicate no quoting (and thus escaping should be with backslashes).
|
||||||
|
|
|
@ -3087,14 +3087,13 @@ static wchar_t unescaped_quote(const wcstring &str, size_t pos)
|
||||||
/* Returns true if the last token is a comment. */
|
/* Returns true if the last token is a comment. */
|
||||||
static bool text_ends_in_comment(const wcstring &text)
|
static bool text_ends_in_comment(const wcstring &text)
|
||||||
{
|
{
|
||||||
token_type last_type = TOK_NONE;
|
|
||||||
tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
|
tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
|
||||||
while (tok_has_next(&tok))
|
tok_t token;
|
||||||
|
while (tok.next(&token))
|
||||||
{
|
{
|
||||||
last_type = tok_last_type(&tok);
|
// pass
|
||||||
tok_next(&tok);
|
|
||||||
}
|
}
|
||||||
return last_type == TOK_COMMENT;
|
return token.type == TOK_COMMENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
const wchar_t *reader_readline(int nchars)
|
const wchar_t *reader_readline(int nchars)
|
||||||
|
|
|
@ -52,6 +52,10 @@ segments.
|
||||||
*/
|
*/
|
||||||
#define PIPE_ERROR _( L"Cannot use stdin (fd 0) as pipe output" )
|
#define PIPE_ERROR _( L"Cannot use stdin (fd 0) as pipe output" )
|
||||||
|
|
||||||
|
static void tok_next(tokenizer_t *tok);
|
||||||
|
static enum token_type tok_last_type(tokenizer_t *tok);
|
||||||
|
static const wchar_t *tok_last(tokenizer_t *tok);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Set the latest tokens string to be the specified error message
|
Set the latest tokens string to be the specified error message
|
||||||
*/
|
*/
|
||||||
|
@ -62,11 +66,6 @@ static void tok_call_error(tokenizer_t *tok, enum tokenizer_error error_type, co
|
||||||
tok->last_token = error_message;
|
tok->last_token = error_message;
|
||||||
}
|
}
|
||||||
|
|
||||||
int tok_get_error(tokenizer_t *tok)
|
|
||||||
{
|
|
||||||
return tok->error;
|
|
||||||
}
|
|
||||||
|
|
||||||
tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), show_blank_lines(false), error(TOK_ERROR_NONE), squash_errors(false), continue_line_after_comment(false)
|
tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), show_blank_lines(false), error(TOK_ERROR_NONE), squash_errors(false), continue_line_after_comment(false)
|
||||||
{
|
{
|
||||||
CHECK(b,);
|
CHECK(b,);
|
||||||
|
@ -91,13 +90,18 @@ bool tokenizer_t::next(struct tok_t *result)
|
||||||
result->text = this->last_token;
|
result->text = this->last_token;
|
||||||
result->type = this->last_type;
|
result->type = this->last_type;
|
||||||
result->offset = last_pos;
|
result->offset = last_pos;
|
||||||
|
result->error = this->last_type == TOK_ERROR ? this->error : TOK_ERROR_NONE;
|
||||||
assert(this->buff >= this->orig_buff);
|
assert(this->buff >= this->orig_buff);
|
||||||
result->length = this->buff - this->orig_buff;
|
|
||||||
|
assert(this->buff >= this->orig_buff);
|
||||||
|
size_t current_pos = this->buff - this->orig_buff;
|
||||||
|
result->length = current_pos >= this->last_pos ? current_pos - this->last_pos : 0;
|
||||||
|
|
||||||
tok_next(this);
|
tok_next(this);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
enum token_type tok_last_type(tokenizer_t *tok)
|
static enum token_type tok_last_type(tokenizer_t *tok)
|
||||||
{
|
{
|
||||||
CHECK(tok, TOK_ERROR);
|
CHECK(tok, TOK_ERROR);
|
||||||
CHECK(tok->buff, TOK_ERROR);
|
CHECK(tok->buff, TOK_ERROR);
|
||||||
|
@ -105,25 +109,13 @@ enum token_type tok_last_type(tokenizer_t *tok)
|
||||||
return tok->last_type;
|
return tok->last_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
const wchar_t *tok_last(tokenizer_t *tok)
|
static const wchar_t *tok_last(tokenizer_t *tok)
|
||||||
{
|
{
|
||||||
CHECK(tok, 0);
|
CHECK(tok, 0);
|
||||||
|
|
||||||
return tok->last_token.c_str();
|
return tok->last_token.c_str();
|
||||||
}
|
}
|
||||||
|
|
||||||
int tok_has_next(tokenizer_t *tok)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
Return 1 on broken tokenizer
|
|
||||||
*/
|
|
||||||
CHECK(tok, 1);
|
|
||||||
CHECK(tok->buff, 1);
|
|
||||||
|
|
||||||
/* fwprintf( stderr, L"has_next is %ls \n", tok->has_next?L"true":L"false" );*/
|
|
||||||
return tok->has_next;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Tests if this character can be a part of a string. The redirect ^ is allowed unless it's the first character.
|
Tests if this character can be a part of a string. The redirect ^ is allowed unless it's the first character.
|
||||||
Hash (#) starts a comment if it's the first character in a token; otherwise it is considered a string character.
|
Hash (#) starts a comment if it's the first character in a token; otherwise it is considered a string character.
|
||||||
|
@ -539,7 +531,7 @@ static bool my_iswspace(wchar_t c)
|
||||||
return c != L'\n' && iswspace(c);
|
return c != L'\n' && iswspace(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
void tok_next(tokenizer_t *tok)
|
static void tok_next(tokenizer_t *tok)
|
||||||
{
|
{
|
||||||
|
|
||||||
CHECK(tok,);
|
CHECK(tok,);
|
||||||
|
@ -718,20 +710,6 @@ wcstring tok_first(const wchar_t *str)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
int tok_get_pos(const tokenizer_t *tok)
|
|
||||||
{
|
|
||||||
CHECK(tok, 0);
|
|
||||||
return (int)tok->last_pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t tok_get_extent(const tokenizer_t *tok)
|
|
||||||
{
|
|
||||||
CHECK(tok, 0);
|
|
||||||
size_t current_pos = tok->buff - tok->orig_buff;
|
|
||||||
return current_pos > tok->last_pos ? current_pos - tok->last_pos : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool move_word_state_machine_t::consume_char_punctuation(wchar_t c)
|
bool move_word_state_machine_t::consume_char_punctuation(wchar_t c)
|
||||||
{
|
{
|
||||||
enum
|
enum
|
||||||
|
|
|
@ -76,16 +76,16 @@ struct tok_t
|
||||||
/* The type of the token */
|
/* The type of the token */
|
||||||
token_type type;
|
token_type type;
|
||||||
|
|
||||||
|
/* If an error, this is the error code */
|
||||||
|
enum tokenizer_error error;
|
||||||
|
|
||||||
/* Offset of the token */
|
/* Offset of the token */
|
||||||
size_t offset;
|
size_t offset;
|
||||||
|
|
||||||
/* Length of the token */
|
/* Length of the token */
|
||||||
size_t length;
|
size_t length;
|
||||||
|
|
||||||
/* If an error, this is the error code */
|
tok_t() : type(TOK_NONE), error(TOK_ERROR_NONE), offset(-1), length(-1) {}
|
||||||
enum tokenizer_error error;
|
|
||||||
|
|
||||||
tok_t() : type(TOK_NONE), offset(-1), length(-1), error(TOK_ERROR_NONE) {}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -138,33 +138,6 @@ struct tokenizer_t
|
||||||
bool next(struct tok_t *result);
|
bool next(struct tok_t *result);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
Jump to the next token.
|
|
||||||
*/
|
|
||||||
void tok_next(tokenizer_t *tok);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the type of the last token. Must be one of the values in the token_type enum.
|
|
||||||
*/
|
|
||||||
enum token_type tok_last_type(tokenizer_t *tok);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the last token string. The string should not be freed by the caller. This returns nonsense results for some token types, like TOK_END.
|
|
||||||
*/
|
|
||||||
const wchar_t *tok_last(tokenizer_t *tok);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns true as long as there are more tokens left
|
|
||||||
*/
|
|
||||||
int tok_has_next(tokenizer_t *tok);
|
|
||||||
|
|
||||||
/**
|
|
||||||
Returns the position of the beginning of the current token in the original string
|
|
||||||
*/
|
|
||||||
int tok_get_pos(const tokenizer_t *tok);
|
|
||||||
|
|
||||||
/** Returns the extent of the current token */
|
|
||||||
size_t tok_get_extent(const tokenizer_t *tok);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Returns only the first token from the specified string. This is a
|
Returns only the first token from the specified string. This is a
|
||||||
|
@ -175,11 +148,6 @@ size_t tok_get_extent(const tokenizer_t *tok);
|
||||||
*/
|
*/
|
||||||
wcstring tok_first(const wchar_t *str);
|
wcstring tok_first(const wchar_t *str);
|
||||||
|
|
||||||
/**
|
|
||||||
Get tokenizer error type. Should only be called if tok_last_tope returns TOK_ERROR.
|
|
||||||
*/
|
|
||||||
int tok_get_error(tokenizer_t *tok);
|
|
||||||
|
|
||||||
/* Helper function to determine redirection type from a string, or TOK_NONE if the redirection is invalid. Also returns the fd by reference. */
|
/* Helper function to determine redirection type from a string, or TOK_NONE if the redirection is invalid. Also returns the fd by reference. */
|
||||||
enum token_type redirection_type_for_string(const wcstring &str, int *out_fd = NULL);
|
enum token_type redirection_type_for_string(const wcstring &str, int *out_fd = NULL);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue