2012-11-18 10:23:22 +00:00
/** \file tokenizer.h
2005-09-20 13:26:39 +00:00
A specialized tokenizer for tokenizing the fish language . In the
future , the tokenizer should be extended to support marks ,
tokenizing multiple strings and disposing of unused string
segments .
*/
2005-10-04 15:11:39 +00:00
# ifndef FISH_TOKENIZER_H
# define FISH_TOKENIZER_H
# include <wchar.h>
2005-09-20 13:26:39 +00:00
/**
Token types
*/
enum token_type
{
2012-11-19 00:30:30 +00:00
TOK_NONE , /**< Tokenizer not yet constructed */
TOK_ERROR , /**< Error reading token */
TOK_INVALID , /**< Invalid token */
TOK_STRING , /**< String token */
TOK_PIPE , /**< Pipe token */
TOK_END , /**< End token */
TOK_REDIRECT_OUT , /**< redirection token */
TOK_REDIRECT_APPEND , /**< redirection append token */
TOK_REDIRECT_IN , /**< input redirection token */
TOK_REDIRECT_FD , /**< redirection to new fd token */
TOK_REDIRECT_NOCLOB , /**<? redirection token */
TOK_BACKGROUND , /**< send job to bg token */
TOK_COMMENT /**< comment token */
2012-02-15 19:33:41 +00:00
} ;
2006-10-07 00:56:25 +00:00
/**
Tokenizer error types
*/
enum tokenizer_error
{
2012-11-19 00:30:30 +00:00
TOK_UNTERMINATED_QUOTE ,
TOK_UNTERMINATED_SUBSHELL ,
TOK_UNTERMINATED_ESCAPE ,
TOK_OTHER
2006-10-07 00:56:25 +00:00
}
2012-11-19 00:30:30 +00:00
;
2006-10-07 00:56:25 +00:00
2005-09-20 13:26:39 +00:00
/**
Flag telling the tokenizer to accept incomplete parameters ,
i . e . parameters with mismatching paranthesis , etc . This is useful
for tab - completion .
*/
# define TOK_ACCEPT_UNFINISHED 1
/**
Flag telling the tokenizer not to remove comments . Useful for
syntax highlighting .
*/
# define TOK_SHOW_COMMENTS 2
2012-02-17 23:55:54 +00:00
/** Flag telling the tokenizer to not generate error messages, which we need to do when tokenizing off of the main thread (since wgettext is not thread safe).
*/
# define TOK_SQUASH_ERRORS 4
2005-09-20 13:26:39 +00:00
/**
2012-11-18 10:23:22 +00:00
The tokenizer struct .
2005-09-20 13:26:39 +00:00
*/
2012-01-23 04:47:13 +00:00
struct tokenizer
2005-09-20 13:26:39 +00:00
{
2012-11-19 00:30:30 +00:00
/** A pointer into the original string, showing where the next token begins */
const wchar_t * buff ;
/** A copy of the original string */
const wchar_t * orig_buff ;
/** A pointer to the last token*/
wchar_t * last ;
/** Type of last token*/
int last_type ;
/** Length of last token*/
size_t last_len ;
/** Offset of last token*/
size_t last_pos ;
/** Whether there are more tokens*/
bool has_next ;
/** Whether incomplete tokens are accepted*/
bool accept_unfinished ;
/** Whether commants should be returned*/
bool show_comments ;
/** Type of last quote, can be either ' or ".*/
wchar_t last_quote ;
/** Last error */
int error ;
2012-02-17 23:55:54 +00:00
/* Whether we are squashing errors */
bool squash_errors ;
2012-08-05 00:44:14 +00:00
/* Cached line number information */
size_t cached_lineno_offset ;
int cached_lineno_count ;
/** Return the line number of the character at the given offset */
int line_number_of_character_at_offset ( size_t offset ) ;
2012-01-23 04:47:13 +00:00
} ;
2005-09-20 13:26:39 +00:00
/**
Initialize the tokenizer . b is the string that is to be
tokenized . It is not copied , and should not be freed by the caller
until after the tokenizer is destroyed .
\ param tok The tokenizer to initialize
\ param b The string to tokenize
\ param flags Flags to the tokenizer . Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
to accept incomplete tokens , such as a subshell without a closing
parenthesis , as a valid token . Setting TOK_SHOW_COMMENTS will return comments as tokens
2012-11-18 10:23:22 +00:00
2005-09-20 13:26:39 +00:00
*/
2012-11-19 00:30:30 +00:00
void tok_init ( tokenizer * tok , const wchar_t * b , int flags ) ;
2005-09-20 13:26:39 +00:00
/**
Jump to the next token .
*/
2012-11-19 00:30:30 +00:00
void tok_next ( tokenizer * tok ) ;
2005-09-20 13:26:39 +00:00
/**
Returns the type of the last token . Must be one of the values in the token_type enum .
*/
2012-11-19 00:30:30 +00:00
int tok_last_type ( tokenizer * tok ) ;
2005-09-20 13:26:39 +00:00
/**
Returns the last token string . The string should not be freed by the caller .
*/
2012-11-19 00:30:30 +00:00
wchar_t * tok_last ( tokenizer * tok ) ;
2005-09-20 13:26:39 +00:00
/**
Returns the type of quote from the last TOK_QSTRING
*/
2012-11-19 00:30:30 +00:00
wchar_t tok_last_quote ( tokenizer * tok ) ;
2005-09-20 13:26:39 +00:00
/**
Returns true as long as there are more tokens left
*/
2012-11-19 00:30:30 +00:00
int tok_has_next ( tokenizer * tok ) ;
2005-09-20 13:26:39 +00:00
/**
Returns the position of the beginning of the current token in the original string
*/
2012-11-19 00:30:30 +00:00
int tok_get_pos ( tokenizer * tok ) ;
2005-09-20 13:26:39 +00:00
/**
Destroy the tokenizer and free asociated memory
*/
2012-11-19 00:30:30 +00:00
void tok_destroy ( tokenizer * tok ) ;
2005-09-20 13:26:39 +00:00
/**
Returns the original string to tokenizer
*/
2012-11-19 00:30:30 +00:00
const wchar_t * tok_string ( tokenizer * tok ) ;
2005-09-20 13:26:39 +00:00
/**
Returns only the first token from the specified string . This is a
convenience function , used to retrieve the first token of a
string . This can be useful for error messages , etc .
The string should be freed . After use .
*/
2012-11-19 00:30:30 +00:00
wchar_t * tok_first ( const wchar_t * str ) ;
2005-09-20 13:26:39 +00:00
/**
Move tokenizer position
*/
2012-11-19 00:30:30 +00:00
void tok_set_pos ( tokenizer * tok , int pos ) ;
2005-09-20 13:26:39 +00:00
/**
Returns a string description of the specified token type
*/
2012-11-19 00:30:30 +00:00
const wchar_t * tok_get_desc ( int type ) ;
2005-09-20 13:26:39 +00:00
2006-10-07 00:56:25 +00:00
/**
Get tokenizer error type . Should only be called if tok_last_tope returns TOK_ERROR .
*/
2012-11-19 00:30:30 +00:00
int tok_get_error ( tokenizer * tok ) ;
2006-10-07 00:56:25 +00:00
2005-10-04 15:11:39 +00:00
# endif