/** \file tokenizer.h A specialized tokenizer for tokenizing the fish language. In the future, the tokenizer should be extended to support marks, tokenizing multiple strings and disposing of unused string segments. */ #ifndef FISH_TOKENIZER_H #define FISH_TOKENIZER_H #include #include "common.h" /** Token types */ enum token_type { TOK_NONE, /**< Tokenizer not yet constructed */ TOK_ERROR, /**< Error reading token */ TOK_STRING,/**< String token */ TOK_PIPE,/**< Pipe token */ TOK_END,/**< End token (semicolon or newline, not literal end) */ TOK_REDIRECT_OUT, /**< redirection token */ TOK_REDIRECT_APPEND,/**< redirection append token */ TOK_REDIRECT_IN,/**< input redirection token */ TOK_REDIRECT_FD,/**< redirection to new fd token */ TOK_REDIRECT_NOCLOB, /**, <, etc. is_first should indicate whether this is the first character in a potential string. */ bool tok_is_string_character(wchar_t c, bool is_first); /** Move tokenizer position */ void tok_set_pos(tokenizer_t *tok, int pos); /** Returns a string description of the specified token type */ const wchar_t *tok_get_desc(int type); /** Get tokenizer error type. Should only be called if tok_last_tope returns TOK_ERROR. */ int tok_get_error(tokenizer_t *tok); enum move_word_style_t { move_word_style_punctuation, //stop at punctuation move_word_style_path_components //stops at path components }; /* Our state machine that implements "one word" movement or erasure. */ class move_word_state_machine_t { private: bool consume_char_punctuation(wchar_t c); bool consume_char_path_components(wchar_t c); bool is_path_component_character(wchar_t c); int state; move_word_style_t style; public: move_word_state_machine_t(move_word_style_t st); bool consume_char(wchar_t c); void reset(); }; #endif