From 365a6ee384bcb2e9ec9b28262d65d99d785a544e Mon Sep 17 00:00:00 2001 From: Aaron Gyes Date: Tue, 21 Dec 2021 02:26:41 -0800 Subject: [PATCH] Use source_offset_t (uint32) in tokenizer. Seems like size_t is unnecessarily large as well, as elsewhere in the code we are clamping down to uint32_t / source_offset_t. This makes tok_t more like 16 bytes. More cleanup seems desirable, this is not very well hamrnoized across our code base. --- src/complete.cpp | 5 +---- src/parse_constants.h | 9 ++++++--- src/parse_tree.h | 4 ---- src/tokenizer.h | 18 +++++++++--------- 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/src/complete.cpp b/src/complete.cpp index 9bda3c61e..559791c7f 100644 --- a/src/complete.cpp +++ b/src/complete.cpp @@ -1674,10 +1674,7 @@ void completer_t::perform_for_commandline(wcstring cmdline) { custom_arg_data_t arg_data{&var_assignments}; arg_data.had_ddash = had_ddash; - assert(cmd_tok.offset < std::numeric_limits::max()); - assert(cmd_tok.length < std::numeric_limits::max()); - source_range_t command_range = {static_cast(cmd_tok.offset), - static_cast(cmd_tok.length)}; + source_range_t command_range = {cmd_tok.offset, cmd_tok.length}; wcstring exp_command = cmd_tok.get_source(cmdline); bool unescaped = diff --git a/src/parse_constants.h b/src/parse_constants.h index 904e75ad5..a03e416fc 100644 --- a/src/parse_constants.h +++ b/src/parse_constants.h @@ -7,6 +7,9 @@ #include "common.h" #include "enum_map.h" +using source_offset_t = uint32_t; +constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast(-1); + #define PARSER_DIE() \ do { \ FLOG(error, L"Parser dying!"); \ @@ -15,10 +18,10 @@ // A range of source code. struct source_range_t { - uint32_t start; - uint32_t length; + source_offset_t start; + source_offset_t length; - uint32_t end() const { + source_offset_t end() const { assert(start + length >= start && "Overflow"); return start + length; } diff --git a/src/parse_tree.h b/src/parse_tree.h index dae6c8cd1..7f9d9e1e2 100644 --- a/src/parse_tree.h +++ b/src/parse_tree.h @@ -16,10 +16,6 @@ #include "parse_constants.h" #include "tokenizer.h" -typedef uint32_t source_offset_t; - -constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast(-1); - /// A struct representing the token type that we use internally. struct parse_token_t { enum parse_token_type_t type; // The type of the token as represented by the parser diff --git a/src/tokenizer.h b/src/tokenizer.h index 8a860cc13..f0504db02 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -10,8 +10,8 @@ #include "parse_constants.h" #include "redirection.h" -/// Token types. -enum class token_type_t { +/// Token types. XXX Why this isn't parse_token_type_t, I'm not really sure. +enum class token_type_t : uint8_t { error, /// Error reading token string, /// String token pipe, /// Pipe token @@ -39,7 +39,7 @@ enum class token_type_t { using tok_flags_t = unsigned int; -enum class tokenizer_error_t { +enum class tokenizer_error_t : uint8_t { none, unterminated_quote, unterminated_subshell, @@ -61,20 +61,20 @@ const wchar_t *tokenizer_get_error_message(tokenizer_error_t err); struct tok_t { // Offset of the token. - size_t offset{0}; + source_offset_t offset{0}; // Length of the token. - size_t length{0}; + source_offset_t length{0}; // If an error, this is the offset of the error within the token. A value of 0 means it occurred // at 'offset'. - size_t error_offset_within_token{size_t(-1)}; - - // The type of the token. - token_type_t type; + source_offset_t error_offset_within_token{SOURCE_OFFSET_INVALID}; // If an error, this is the error code. tokenizer_error_t error{tokenizer_error_t::none}; + // The type of the token. + token_type_t type; + // Construct from a token type. explicit tok_t(token_type_t type);