restyle tokenizer module to match project style

Reduces lint errors from 70 to 46 (-34%). Line count from 1158 to 936 (-19%).

Another step in resolving issue #2902.
This commit is contained in:
Kurtis Rader 2016-05-03 14:35:12 -07:00
parent 0aa7fd95b8
commit c14bac4284
2 changed files with 445 additions and 667 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,191 +1,151 @@
/** \file tokenizer.h // A specialized tokenizer for tokenizing the fish language. In the future, the tokenizer should be
// extended to support marks, tokenizing multiple strings and disposing of unused string segments.
A specialized tokenizer for tokenizing the fish language. In the
future, the tokenizer should be extended to support marks,
tokenizing multiple strings and disposing of unused string
segments.
*/
#ifndef FISH_TOKENIZER_H #ifndef FISH_TOKENIZER_H
#define FISH_TOKENIZER_H #define FISH_TOKENIZER_H
#include <stddef.h>
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h>
#include "common.h" #include "common.h"
/** /// Token types.
Token types enum token_type {
*/ TOK_NONE, /// Tokenizer not yet constructed
enum token_type TOK_ERROR, /// Error reading token
{ TOK_STRING, /// String token
TOK_NONE, /**< Tokenizer not yet constructed */ TOK_PIPE, /// Pipe token
TOK_ERROR, /**< Error reading token */ TOK_END, /// End token (semicolon or newline, not literal end)
TOK_STRING,/**< String token */ TOK_REDIRECT_OUT, /// redirection token
TOK_PIPE,/**< Pipe token */ TOK_REDIRECT_APPEND, /// redirection append token
TOK_END,/**< End token (semicolon or newline, not literal end) */ TOK_REDIRECT_IN, /// input redirection token
TOK_REDIRECT_OUT, /**< redirection token */ TOK_REDIRECT_FD, /// redirection to new fd token
TOK_REDIRECT_APPEND,/**< redirection append token */ TOK_REDIRECT_NOCLOB, /// redirection token
TOK_REDIRECT_IN,/**< input redirection token */ TOK_BACKGROUND, /// send job to bg token
TOK_REDIRECT_FD,/**< redirection to new fd token */ TOK_COMMENT /// comment token
TOK_REDIRECT_NOCLOB, /**<? redirection token */
TOK_BACKGROUND,/**< send job to bg token */
TOK_COMMENT/**< comment token */
}; };
/** /// Tokenizer error types.
Tokenizer error types enum tokenizer_error {
*/
enum tokenizer_error
{
TOK_ERROR_NONE, TOK_ERROR_NONE,
TOK_UNTERMINATED_QUOTE, TOK_UNTERMINATED_QUOTE,
TOK_UNTERMINATED_SUBSHELL, TOK_UNTERMINATED_SUBSHELL,
TOK_UNTERMINATED_SLICE, TOK_UNTERMINATED_SLICE,
TOK_UNTERMINATED_ESCAPE, TOK_UNTERMINATED_ESCAPE,
TOK_OTHER TOK_OTHER
} };
;
/// Flag telling the tokenizer to accept incomplete parameters, i.e. parameters with mismatching
/** /// paranthesis, etc. This is useful for tab-completion.
Flag telling the tokenizer to accept incomplete parameters,
i.e. parameters with mismatching paranthesis, etc. This is useful
for tab-completion.
*/
#define TOK_ACCEPT_UNFINISHED 1 #define TOK_ACCEPT_UNFINISHED 1
/** /// Flag telling the tokenizer not to remove comments. Useful for syntax highlighting.
Flag telling the tokenizer not to remove comments. Useful for
syntax highlighting.
*/
#define TOK_SHOW_COMMENTS 2 #define TOK_SHOW_COMMENTS 2
/** Flag telling the tokenizer to not generate error messages, which we need to do when tokenizing off of the main thread (since wgettext is not thread safe). /// Flag telling the tokenizer to not generate error messages, which we need to do when tokenizing
*/ /// off of the main thread (since wgettext is not thread safe).
#define TOK_SQUASH_ERRORS 4 #define TOK_SQUASH_ERRORS 4
/** Ordinarily, the tokenizer ignores newlines following a newline, or a semicolon. /// Ordinarily, the tokenizer ignores newlines following a newline, or a semicolon. This flag tells
This flag tells the tokenizer to return each of them as a separate END. */ /// the tokenizer to return each of them as a separate END.
#define TOK_SHOW_BLANK_LINES 8 #define TOK_SHOW_BLANK_LINES 8
typedef unsigned int tok_flags_t; typedef unsigned int tok_flags_t;
struct tok_t struct tok_t {
{ // The text of the token, or an error message for type error.
/* The text of the token, or an error message for type error */
wcstring text; wcstring text;
// The type of the token.
/* The type of the token */
token_type type; token_type type;
// If an error, this is the error code.
/* If an error, this is the error code */
enum tokenizer_error error; enum tokenizer_error error;
// If an error, this is the offset of the error within the token. A value of 0 means it occurred
/* If an error, this is the offset of the error within the token. A value of 0 means it occurred at 'offset' */ // at 'offset'.
size_t error_offset; size_t error_offset;
// Offset of the token.
/* Offset of the token */
size_t offset; size_t offset;
// Length of the token.
/* Length of the token */
size_t length; size_t length;
tok_t() : type(TOK_NONE), error(TOK_ERROR_NONE), error_offset(-1), offset(-1), length(-1) {} tok_t() : type(TOK_NONE), error(TOK_ERROR_NONE), error_offset(-1), offset(-1), length(-1) {}
}; };
/** /// The tokenizer struct.
The tokenizer struct. class tokenizer_t {
*/ // No copying, etc.
class tokenizer_t tokenizer_t(const tokenizer_t &);
{ void operator=(const tokenizer_t &);
/* No copying, etc. */
tokenizer_t(const tokenizer_t&);
void operator=(const tokenizer_t&);
/** A pointer into the original string, showing where the next token begins */ /// A pointer into the original string, showing where the next token begins.
const wchar_t *buff; const wchar_t *buff;
/** A copy of the original string */ /// A copy of the original string.
const wchar_t *orig_buff; const wchar_t *orig_buff;
/** The last token */ /// The last token.
wcstring last_token; wcstring last_token;
/// Type of last token.
/** Type of last token*/
enum token_type last_type; enum token_type last_type;
/// Offset of last token.
/** Offset of last token*/
size_t last_pos; size_t last_pos;
/** Whether there are more tokens*/ /// Whether there are more tokens.
bool has_next; bool has_next;
/** Whether incomplete tokens are accepted*/ /// Whether incomplete tokens are accepted.
bool accept_unfinished; bool accept_unfinished;
/** Whether comments should be returned*/ /// Whether comments should be returned.
bool show_comments; bool show_comments;
/** Whether all blank lines are returned */ /// Whether all blank lines are returned.
bool show_blank_lines; bool show_blank_lines;
/** Last error */ /// Last error.
tokenizer_error error; tokenizer_error error;
/** Last error offset, in "global" coordinates (relative to orig_buff) */ /// Last error offset, in "global" coordinates (relative to orig_buff).
size_t global_error_offset; size_t global_error_offset;
/* Whether we are squashing errors */ /// Whether we are squashing errors.
bool squash_errors; bool squash_errors;
/// Whether to continue the previous line after the comment.
/* Whether to continue the previous line after the comment */
bool continue_line_after_comment; bool continue_line_after_comment;
void call_error(enum tokenizer_error error_type, const wchar_t *where, const wchar_t *error_message); void call_error(enum tokenizer_error error_type, const wchar_t *where,
const wchar_t *error_message);
void read_string(); void read_string();
void read_comment(); void read_comment();
void tok_next(); void tok_next();
public: public:
/** /// Constructor for a tokenizer. b is the string that is to be tokenized. It is not copied, and
Constructor for a tokenizer. b is the string that is to be /// should not be freed by the caller until after the tokenizer is destroyed.
tokenized. It is not copied, and should not be freed by the caller ///
until after the tokenizer is destroyed. /// \param b The string to tokenize
/// \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
\param b The string to tokenize /// to accept incomplete tokens, such as a subshell without a closing parenthesis, as a valid
\param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer /// token. Setting TOK_SHOW_COMMENTS will return comments as tokens
to accept incomplete tokens, such as a subshell without a closing
parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
*/
tokenizer_t(const wchar_t *b, tok_flags_t flags); tokenizer_t(const wchar_t *b, tok_flags_t flags);
/** Returns the next token by reference. Returns true if we got one, false if we're at the end. */ /// Returns the next token by reference. Returns true if we got one, false if we're at the end.
bool next(struct tok_t *result); bool next(struct tok_t *result);
}; };
/// Returns only the first token from the specified string. This is a convenience function, used to
/** /// retrieve the first token of a string. This can be useful for error messages, etc. On failure,
Returns only the first token from the specified string. This is a /// returns the empty string.
convenience function, used to retrieve the first token of a
string. This can be useful for error messages, etc.
On failure, returns the empty string.
*/
wcstring tok_first(const wcstring &str); wcstring tok_first(const wcstring &str);
/* Helper function to determine redirection type from a string, or TOK_NONE if the redirection is invalid. Also returns the fd by reference. */ /// Helper function to determine redirection type from a string, or TOK_NONE if the redirection is
/// invalid. Also returns the fd by reference.
enum token_type redirection_type_for_string(const wcstring &str, int *out_fd = NULL); enum token_type redirection_type_for_string(const wcstring &str, int *out_fd = NULL);
/* Helper function to determine which fd is redirected by a pipe */ /// Helper function to determine which fd is redirected by a pipe.
int fd_redirected_by_pipe(const wcstring &str); int fd_redirected_by_pipe(const wcstring &str);
/* Helper function to return oflags (as in open(2)) for a redirection type */ /// Helper function to return oflags (as in open(2)) for a redirection type.
int oflags_for_redirection_type(enum token_type type); int oflags_for_redirection_type(enum token_type type);
enum move_word_style_t enum move_word_style_t {
{ move_word_style_punctuation, // stop at punctuation
move_word_style_punctuation, //stop at punctuation move_word_style_path_components, // stops at path components
move_word_style_path_components, //stops at path components move_word_style_whitespace // stops at whitespace
move_word_style_whitespace // stops at whitespace
}; };
/* Our state machine that implements "one word" movement or erasure. */ /// Our state machine that implements "one word" movement or erasure.
class move_word_state_machine_t class move_word_state_machine_t {
{ private:
private:
bool consume_char_punctuation(wchar_t c); bool consume_char_punctuation(wchar_t c);
bool consume_char_path_components(wchar_t c); bool consume_char_path_components(wchar_t c);
bool is_path_component_character(wchar_t c); bool is_path_component_character(wchar_t c);
@ -194,12 +154,10 @@ private:
int state; int state;
move_word_style_t style; move_word_style_t style;
public: public:
explicit move_word_state_machine_t(move_word_style_t st); explicit move_word_state_machine_t(move_word_style_t st);
bool consume_char(wchar_t c); bool consume_char(wchar_t c);
void reset(); void reset();
}; };
#endif #endif