mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-14 14:03:58 +00:00
restyle tokenizer module to match project style
Reduces lint errors from 70 to 46 (-34%). Line count from 1158 to 936 (-19%). Another step in resolving issue #2902.
This commit is contained in:
parent
0aa7fd95b8
commit
c14bac4284
2 changed files with 445 additions and 667 deletions
File diff suppressed because it is too large
Load diff
206
src/tokenizer.h
206
src/tokenizer.h
|
@ -1,191 +1,151 @@
|
|||
/** \file tokenizer.h
|
||||
|
||||
A specialized tokenizer for tokenizing the fish language. In the
|
||||
future, the tokenizer should be extended to support marks,
|
||||
tokenizing multiple strings and disposing of unused string
|
||||
segments.
|
||||
*/
|
||||
// A specialized tokenizer for tokenizing the fish language. In the future, the tokenizer should be
|
||||
// extended to support marks, tokenizing multiple strings and disposing of unused string segments.
|
||||
#ifndef FISH_TOKENIZER_H
|
||||
#define FISH_TOKENIZER_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
/**
|
||||
Token types
|
||||
*/
|
||||
enum token_type
|
||||
{
|
||||
TOK_NONE, /**< Tokenizer not yet constructed */
|
||||
TOK_ERROR, /**< Error reading token */
|
||||
TOK_STRING,/**< String token */
|
||||
TOK_PIPE,/**< Pipe token */
|
||||
TOK_END,/**< End token (semicolon or newline, not literal end) */
|
||||
TOK_REDIRECT_OUT, /**< redirection token */
|
||||
TOK_REDIRECT_APPEND,/**< redirection append token */
|
||||
TOK_REDIRECT_IN,/**< input redirection token */
|
||||
TOK_REDIRECT_FD,/**< redirection to new fd token */
|
||||
TOK_REDIRECT_NOCLOB, /**<? redirection token */
|
||||
TOK_BACKGROUND,/**< send job to bg token */
|
||||
TOK_COMMENT/**< comment token */
|
||||
/// Token types.
|
||||
enum token_type {
|
||||
TOK_NONE, /// Tokenizer not yet constructed
|
||||
TOK_ERROR, /// Error reading token
|
||||
TOK_STRING, /// String token
|
||||
TOK_PIPE, /// Pipe token
|
||||
TOK_END, /// End token (semicolon or newline, not literal end)
|
||||
TOK_REDIRECT_OUT, /// redirection token
|
||||
TOK_REDIRECT_APPEND, /// redirection append token
|
||||
TOK_REDIRECT_IN, /// input redirection token
|
||||
TOK_REDIRECT_FD, /// redirection to new fd token
|
||||
TOK_REDIRECT_NOCLOB, /// redirection token
|
||||
TOK_BACKGROUND, /// send job to bg token
|
||||
TOK_COMMENT /// comment token
|
||||
};
|
||||
|
||||
/**
|
||||
Tokenizer error types
|
||||
*/
|
||||
enum tokenizer_error
|
||||
{
|
||||
/// Tokenizer error types.
|
||||
enum tokenizer_error {
|
||||
TOK_ERROR_NONE,
|
||||
TOK_UNTERMINATED_QUOTE,
|
||||
TOK_UNTERMINATED_SUBSHELL,
|
||||
TOK_UNTERMINATED_SLICE,
|
||||
TOK_UNTERMINATED_ESCAPE,
|
||||
TOK_OTHER
|
||||
}
|
||||
;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
Flag telling the tokenizer to accept incomplete parameters,
|
||||
i.e. parameters with mismatching paranthesis, etc. This is useful
|
||||
for tab-completion.
|
||||
*/
|
||||
/// Flag telling the tokenizer to accept incomplete parameters, i.e. parameters with mismatching
|
||||
/// paranthesis, etc. This is useful for tab-completion.
|
||||
#define TOK_ACCEPT_UNFINISHED 1
|
||||
|
||||
/**
|
||||
Flag telling the tokenizer not to remove comments. Useful for
|
||||
syntax highlighting.
|
||||
*/
|
||||
/// Flag telling the tokenizer not to remove comments. Useful for syntax highlighting.
|
||||
#define TOK_SHOW_COMMENTS 2
|
||||
|
||||
/** Flag telling the tokenizer to not generate error messages, which we need to do when tokenizing off of the main thread (since wgettext is not thread safe).
|
||||
*/
|
||||
/// Flag telling the tokenizer to not generate error messages, which we need to do when tokenizing
|
||||
/// off of the main thread (since wgettext is not thread safe).
|
||||
#define TOK_SQUASH_ERRORS 4
|
||||
|
||||
/** Ordinarily, the tokenizer ignores newlines following a newline, or a semicolon.
|
||||
This flag tells the tokenizer to return each of them as a separate END. */
|
||||
/// Ordinarily, the tokenizer ignores newlines following a newline, or a semicolon. This flag tells
|
||||
/// the tokenizer to return each of them as a separate END.
|
||||
#define TOK_SHOW_BLANK_LINES 8
|
||||
|
||||
typedef unsigned int tok_flags_t;
|
||||
|
||||
struct tok_t
|
||||
{
|
||||
/* The text of the token, or an error message for type error */
|
||||
struct tok_t {
|
||||
// The text of the token, or an error message for type error.
|
||||
wcstring text;
|
||||
|
||||
/* The type of the token */
|
||||
// The type of the token.
|
||||
token_type type;
|
||||
|
||||
/* If an error, this is the error code */
|
||||
// If an error, this is the error code.
|
||||
enum tokenizer_error error;
|
||||
|
||||
/* If an error, this is the offset of the error within the token. A value of 0 means it occurred at 'offset' */
|
||||
// If an error, this is the offset of the error within the token. A value of 0 means it occurred
|
||||
// at 'offset'.
|
||||
size_t error_offset;
|
||||
|
||||
/* Offset of the token */
|
||||
// Offset of the token.
|
||||
size_t offset;
|
||||
|
||||
/* Length of the token */
|
||||
// Length of the token.
|
||||
size_t length;
|
||||
|
||||
|
||||
tok_t() : type(TOK_NONE), error(TOK_ERROR_NONE), error_offset(-1), offset(-1), length(-1) {}
|
||||
};
|
||||
|
||||
/**
|
||||
The tokenizer struct.
|
||||
*/
|
||||
class tokenizer_t
|
||||
{
|
||||
/* No copying, etc. */
|
||||
tokenizer_t(const tokenizer_t&);
|
||||
void operator=(const tokenizer_t&);
|
||||
/// The tokenizer struct.
|
||||
class tokenizer_t {
|
||||
// No copying, etc.
|
||||
tokenizer_t(const tokenizer_t &);
|
||||
void operator=(const tokenizer_t &);
|
||||
|
||||
/** A pointer into the original string, showing where the next token begins */
|
||||
/// A pointer into the original string, showing where the next token begins.
|
||||
const wchar_t *buff;
|
||||
/** A copy of the original string */
|
||||
/// A copy of the original string.
|
||||
const wchar_t *orig_buff;
|
||||
/** The last token */
|
||||
/// The last token.
|
||||
wcstring last_token;
|
||||
|
||||
/** Type of last token*/
|
||||
/// Type of last token.
|
||||
enum token_type last_type;
|
||||
|
||||
/** Offset of last token*/
|
||||
/// Offset of last token.
|
||||
size_t last_pos;
|
||||
/** Whether there are more tokens*/
|
||||
/// Whether there are more tokens.
|
||||
bool has_next;
|
||||
/** Whether incomplete tokens are accepted*/
|
||||
/// Whether incomplete tokens are accepted.
|
||||
bool accept_unfinished;
|
||||
/** Whether comments should be returned*/
|
||||
/// Whether comments should be returned.
|
||||
bool show_comments;
|
||||
/** Whether all blank lines are returned */
|
||||
/// Whether all blank lines are returned.
|
||||
bool show_blank_lines;
|
||||
/** Last error */
|
||||
/// Last error.
|
||||
tokenizer_error error;
|
||||
/** Last error offset, in "global" coordinates (relative to orig_buff) */
|
||||
/// Last error offset, in "global" coordinates (relative to orig_buff).
|
||||
size_t global_error_offset;
|
||||
/* Whether we are squashing errors */
|
||||
/// Whether we are squashing errors.
|
||||
bool squash_errors;
|
||||
|
||||
/* Whether to continue the previous line after the comment */
|
||||
/// Whether to continue the previous line after the comment.
|
||||
bool continue_line_after_comment;
|
||||
|
||||
void call_error(enum tokenizer_error error_type, const wchar_t *where, const wchar_t *error_message);
|
||||
|
||||
void call_error(enum tokenizer_error error_type, const wchar_t *where,
|
||||
const wchar_t *error_message);
|
||||
void read_string();
|
||||
void read_comment();
|
||||
void tok_next();
|
||||
|
||||
public:
|
||||
/**
|
||||
Constructor for a tokenizer. b is the string that is to be
|
||||
tokenized. It is not copied, and should not be freed by the caller
|
||||
until after the tokenizer is destroyed.
|
||||
|
||||
\param b The string to tokenize
|
||||
\param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
|
||||
to accept incomplete tokens, such as a subshell without a closing
|
||||
parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
|
||||
|
||||
*/
|
||||
public:
|
||||
/// Constructor for a tokenizer. b is the string that is to be tokenized. It is not copied, and
|
||||
/// should not be freed by the caller until after the tokenizer is destroyed.
|
||||
///
|
||||
/// \param b The string to tokenize
|
||||
/// \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
|
||||
/// to accept incomplete tokens, such as a subshell without a closing parenthesis, as a valid
|
||||
/// token. Setting TOK_SHOW_COMMENTS will return comments as tokens
|
||||
tokenizer_t(const wchar_t *b, tok_flags_t flags);
|
||||
|
||||
/** Returns the next token by reference. Returns true if we got one, false if we're at the end. */
|
||||
|
||||
/// Returns the next token by reference. Returns true if we got one, false if we're at the end.
|
||||
bool next(struct tok_t *result);
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
Returns only the first token from the specified string. This is a
|
||||
convenience function, used to retrieve the first token of a
|
||||
string. This can be useful for error messages, etc.
|
||||
|
||||
On failure, returns the empty string.
|
||||
*/
|
||||
/// Returns only the first token from the specified string. This is a convenience function, used to
|
||||
/// retrieve the first token of a string. This can be useful for error messages, etc. On failure,
|
||||
/// returns the empty string.
|
||||
wcstring tok_first(const wcstring &str);
|
||||
|
||||
/* Helper function to determine redirection type from a string, or TOK_NONE if the redirection is invalid. Also returns the fd by reference. */
|
||||
/// Helper function to determine redirection type from a string, or TOK_NONE if the redirection is
|
||||
/// invalid. Also returns the fd by reference.
|
||||
enum token_type redirection_type_for_string(const wcstring &str, int *out_fd = NULL);
|
||||
|
||||
/* Helper function to determine which fd is redirected by a pipe */
|
||||
/// Helper function to determine which fd is redirected by a pipe.
|
||||
int fd_redirected_by_pipe(const wcstring &str);
|
||||
|
||||
/* Helper function to return oflags (as in open(2)) for a redirection type */
|
||||
/// Helper function to return oflags (as in open(2)) for a redirection type.
|
||||
int oflags_for_redirection_type(enum token_type type);
|
||||
|
||||
enum move_word_style_t
|
||||
{
|
||||
move_word_style_punctuation, //stop at punctuation
|
||||
move_word_style_path_components, //stops at path components
|
||||
move_word_style_whitespace // stops at whitespace
|
||||
enum move_word_style_t {
|
||||
move_word_style_punctuation, // stop at punctuation
|
||||
move_word_style_path_components, // stops at path components
|
||||
move_word_style_whitespace // stops at whitespace
|
||||
};
|
||||
|
||||
/* Our state machine that implements "one word" movement or erasure. */
|
||||
class move_word_state_machine_t
|
||||
{
|
||||
private:
|
||||
|
||||
/// Our state machine that implements "one word" movement or erasure.
|
||||
class move_word_state_machine_t {
|
||||
private:
|
||||
bool consume_char_punctuation(wchar_t c);
|
||||
bool consume_char_path_components(wchar_t c);
|
||||
bool is_path_component_character(wchar_t c);
|
||||
|
@ -194,12 +154,10 @@ private:
|
|||
int state;
|
||||
move_word_style_t style;
|
||||
|
||||
public:
|
||||
|
||||
public:
|
||||
explicit move_word_state_machine_t(move_word_style_t st);
|
||||
bool consume_char(wchar_t c);
|
||||
void reset();
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue