Use source_offset_t (uint32) in tokenizer.

Seems like size_t is unnecessarily large as well, as elsewhere in the code we are clamping down to uint32_t / source_offset_t. This makes tok_t more like 16 bytes. More cleanup seems desirable, this is not very well hamrnoized across our code base.
2024-12-28 13:53:10 +00:00 · 2021-12-21 02:26:41 -08:00 · 2021-12-21 02:26:41 -08:00 · 365a6ee384
commit 365a6ee384
parent 8e1173bac9
4 changed files with 16 additions and 20 deletions
--- a/src/complete.cpp
+++ b/src/complete.cpp
@ -1674,10 +1674,7 @@ void completer_t::perform_for_commandline(wcstring cmdline) {
        custom_arg_data_t arg_data{&var_assignments};
        arg_data.had_ddash = had_ddash;

-        assert(cmd_tok.offset < std::numeric_limits<uint32_t>::max());
-        assert(cmd_tok.length < std::numeric_limits<uint32_t>::max());
-        source_range_t command_range = {static_cast<uint32_t>(cmd_tok.offset),
-                                        static_cast<uint32_t>(cmd_tok.length)};
+        source_range_t command_range = {cmd_tok.offset, cmd_tok.length};

        wcstring exp_command = cmd_tok.get_source(cmdline);
        bool unescaped =
--- a/src/parse_constants.h
+++ b/src/parse_constants.h
@ -7,6 +7,9 @@
 #include "common.h"
 #include "enum_map.h"

+using source_offset_t = uint32_t;
+constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast<source_offset_t>(-1);
+
 #define PARSER_DIE()                   \
    do {                               \
        FLOG(error, L"Parser dying!"); \
@ -15,10 +18,10 @@

 // A range of source code.
 struct source_range_t {
-    uint32_t start;
-    uint32_t length;
+    source_offset_t start;
+    source_offset_t length;

-    uint32_t end() const {
+    source_offset_t end() const {
        assert(start + length >= start && "Overflow");
        return start + length;
    }
--- a/src/parse_tree.h
+++ b/src/parse_tree.h
@ -16,10 +16,6 @@
 #include "parse_constants.h"
 #include "tokenizer.h"

-typedef uint32_t source_offset_t;
-
-constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast<source_offset_t>(-1);
-
 /// A struct representing the token type that we use internally.
 struct parse_token_t {
    enum parse_token_type_t type;  // The type of the token as represented by the parser
--- a/src/tokenizer.h
+++ b/src/tokenizer.h
@ -10,8 +10,8 @@
 #include "parse_constants.h"
 #include "redirection.h"

-/// Token types.
-enum class token_type_t {
+/// Token types. XXX Why this isn't parse_token_type_t, I'm not really sure.
+enum class token_type_t : uint8_t {
    error,       /// Error reading token
    string,      /// String token
    pipe,        /// Pipe token
@ -39,7 +39,7 @@ enum class token_type_t {

 using tok_flags_t = unsigned int;

-enum class tokenizer_error_t {
+enum class tokenizer_error_t : uint8_t {
    none,
    unterminated_quote,
    unterminated_subshell,
@ -61,20 +61,20 @@ const wchar_t *tokenizer_get_error_message(tokenizer_error_t err);

 struct tok_t {
    // Offset of the token.
-    size_t offset{0};
+    source_offset_t offset{0};
    // Length of the token.
-    size_t length{0};
+    source_offset_t length{0};

    // If an error, this is the offset of the error within the token. A value of 0 means it occurred
    // at 'offset'.
-    size_t error_offset_within_token{size_t(-1)};
-
-    // The type of the token.
-    token_type_t type;
+    source_offset_t error_offset_within_token{SOURCE_OFFSET_INVALID};

    // If an error, this is the error code.
    tokenizer_error_t error{tokenizer_error_t::none};

+    // The type of the token.
+    token_type_t type;
+
    // Construct from a token type.
    explicit tok_t(token_type_t type);