mirror of
https://github.com/fish-shell/fish-shell
synced 2024-11-11 23:47:25 +00:00
Early reworking of tokenizer interface
This commit is contained in:
parent
0dbd83ffaf
commit
618896c043
4 changed files with 72 additions and 20 deletions
|
@ -468,22 +468,34 @@ static void test_tok()
|
|||
const wchar_t *str = L"string <redirection 2>&1 'nested \"quoted\" '(string containing subshells ){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect Compress_Newlines\n \n\t\n \nInto_Just_One";
|
||||
const int types[] =
|
||||
{
|
||||
TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_STRING, TOK_END, TOK_STRING, TOK_END
|
||||
TOK_STRING, TOK_REDIRECT_IN, TOK_STRING, TOK_REDIRECT_FD, TOK_STRING, TOK_STRING, TOK_STRING, TOK_REDIRECT_OUT, TOK_REDIRECT_APPEND, TOK_STRING, TOK_STRING, TOK_END, TOK_STRING
|
||||
};
|
||||
|
||||
say(L"Test correct tokenization");
|
||||
|
||||
tokenizer_t t(str, 0);
|
||||
for (size_t i=0; i < sizeof types / sizeof *types; i++, tok_next(&t))
|
||||
tok_t token;
|
||||
size_t i = 0;
|
||||
while (t.next(&token))
|
||||
{
|
||||
if (types[i] != tok_last_type(&t))
|
||||
if (i > sizeof types / sizeof *types)
|
||||
{
|
||||
err(L"Too many tokens returned from tokenizer");
|
||||
break;
|
||||
}
|
||||
if (types[i] != token.type)
|
||||
{
|
||||
err(L"Tokenization error:");
|
||||
wprintf(L"Token number %d of string \n'%ls'\n, got token '%ls'\n",
|
||||
wprintf(L"Token number %d of string \n'%ls'\n, got token type %ld\n",
|
||||
i+1,
|
||||
str,
|
||||
tok_last(&t));
|
||||
(long)token.type);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
if (i < sizeof types / sizeof *types)
|
||||
{
|
||||
err(L"Too few tokens returned from tokenizer");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -246,7 +246,7 @@ public:
|
|||
/**
|
||||
Saved position used by token history search
|
||||
*/
|
||||
int token_history_pos;
|
||||
size_t token_history_pos;
|
||||
|
||||
/**
|
||||
Saved search string for token history search. Not handled by command_line_changed.
|
||||
|
@ -2256,7 +2256,7 @@ static void handle_token_history(int forward, int reset)
|
|||
return;
|
||||
|
||||
wcstring str;
|
||||
long current_pos;
|
||||
size_t current_pos;
|
||||
|
||||
if (reset)
|
||||
{
|
||||
|
@ -2292,7 +2292,7 @@ static void handle_token_history(int forward, int reset)
|
|||
}
|
||||
else
|
||||
{
|
||||
if (current_pos == -1)
|
||||
if (current_pos == size_t(-1))
|
||||
{
|
||||
data->token_history_buff.clear();
|
||||
|
||||
|
@ -2330,26 +2330,26 @@ static void handle_token_history(int forward, int reset)
|
|||
|
||||
//debug( 3, L"new '%ls'", data->token_history_buff.c_str() );
|
||||
tokenizer_t tok(data->token_history_buff.c_str(), TOK_ACCEPT_UNFINISHED);
|
||||
for (; tok_has_next(&tok); tok_next(&tok))
|
||||
tok_t token;
|
||||
while (tok.next(&token))
|
||||
{
|
||||
switch (tok_last_type(&tok))
|
||||
switch (token.type)
|
||||
{
|
||||
case TOK_STRING:
|
||||
{
|
||||
if (wcsstr(tok_last(&tok), data->search_buff.c_str()))
|
||||
{
|
||||
if (token.text.find(data->search_buff) != wcstring::npos)
|
||||
{
|
||||
//debug( 3, L"Found token at pos %d\n", tok_get_pos( &tok ) );
|
||||
if (tok_get_pos(&tok) >= current_pos)
|
||||
if (token.offset >= current_pos)
|
||||
{
|
||||
break;
|
||||
}
|
||||
//debug( 3, L"ok pos" );
|
||||
|
||||
const wcstring last_tok = tok_last(&tok);
|
||||
if (find(data->search_prev.begin(), data->search_prev.end(), last_tok) == data->search_prev.end())
|
||||
if (find(data->search_prev.begin(), data->search_prev.end(), token.text) == data->search_prev.end())
|
||||
{
|
||||
data->token_history_pos = tok_get_pos(&tok);
|
||||
str = tok_last(&tok);
|
||||
data->token_history_pos = token.offset;
|
||||
str = token.text;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ segments.
|
|||
/**
|
||||
Set the latest tokens string to be the specified error message
|
||||
*/
|
||||
static void tok_call_error(tokenizer_t *tok, int error_type, const wchar_t *error_message)
|
||||
static void tok_call_error(tokenizer_t *tok, enum tokenizer_error error_type, const wchar_t *error_message)
|
||||
{
|
||||
tok->last_type = TOK_ERROR;
|
||||
tok->error = error_type;
|
||||
|
@ -67,7 +67,7 @@ int tok_get_error(tokenizer_t *tok)
|
|||
return tok->error;
|
||||
}
|
||||
|
||||
tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), show_blank_lines(false), error(0), squash_errors(false), continue_line_after_comment(false)
|
||||
tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig_buff(NULL), last_type(TOK_NONE), last_pos(0), has_next(false), accept_unfinished(false), show_comments(false), show_blank_lines(false), error(TOK_ERROR_NONE), squash_errors(false), continue_line_after_comment(false)
|
||||
{
|
||||
CHECK(b,);
|
||||
|
||||
|
@ -81,6 +81,22 @@ tokenizer_t::tokenizer_t(const wchar_t *b, tok_flags_t flags) : buff(NULL), orig
|
|||
tok_next(this);
|
||||
}
|
||||
|
||||
bool tokenizer_t::next(struct tok_t *result)
|
||||
{
|
||||
assert(result != NULL);
|
||||
if (! this->has_next)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
result->text = this->last_token;
|
||||
result->type = this->last_type;
|
||||
result->offset = last_pos;
|
||||
assert(this->buff >= this->orig_buff);
|
||||
result->length = this->buff - this->orig_buff;
|
||||
tok_next(this);
|
||||
return true;
|
||||
}
|
||||
|
||||
enum token_type tok_last_type(tokenizer_t *tok)
|
||||
{
|
||||
CHECK(tok, TOK_ERROR);
|
||||
|
|
|
@ -36,6 +36,7 @@ enum token_type
|
|||
*/
|
||||
enum tokenizer_error
|
||||
{
|
||||
TOK_ERROR_NONE,
|
||||
TOK_UNTERMINATED_QUOTE,
|
||||
TOK_UNTERMINATED_SUBSHELL,
|
||||
TOK_UNTERMINATED_ESCAPE,
|
||||
|
@ -67,6 +68,26 @@ enum tokenizer_error
|
|||
|
||||
typedef unsigned int tok_flags_t;
|
||||
|
||||
struct tok_t
|
||||
{
|
||||
/* The text of the token, or an error message for type error */
|
||||
wcstring text;
|
||||
|
||||
/* The type of the token */
|
||||
token_type type;
|
||||
|
||||
/* Offset of the token */
|
||||
size_t offset;
|
||||
|
||||
/* Length of the token */
|
||||
size_t length;
|
||||
|
||||
/* If an error, this is the error code */
|
||||
enum tokenizer_error error;
|
||||
|
||||
tok_t() : type(TOK_NONE), offset(-1), length(-1), error(TOK_ERROR_NONE) {}
|
||||
};
|
||||
|
||||
/**
|
||||
The tokenizer struct.
|
||||
*/
|
||||
|
@ -93,7 +114,7 @@ struct tokenizer_t
|
|||
/** Whether all blank lines are returned */
|
||||
bool show_blank_lines;
|
||||
/** Last error */
|
||||
int error;
|
||||
tokenizer_error error;
|
||||
/* Whether we are squashing errors */
|
||||
bool squash_errors;
|
||||
|
||||
|
@ -112,6 +133,9 @@ struct tokenizer_t
|
|||
|
||||
*/
|
||||
tokenizer_t(const wchar_t *b, tok_flags_t flags);
|
||||
|
||||
/** Returns the next token by reference. Returns true if we got one, false if we're at the end. */
|
||||
bool next(struct tok_t *result);
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in a new issue