Use source_offset_t (uint32) in tokenizer.

Seems like size_t is unnecessarily large as well, as elsewhere
in the code we are clamping down to uint32_t / source_offset_t.

This makes tok_t more like 16 bytes. More cleanup seems desirable,
this is not very well hamrnoized across our code base.
This commit is contained in:
Aaron Gyes 2021-12-21 02:26:41 -08:00
parent 8e1173bac9
commit 365a6ee384
4 changed files with 16 additions and 20 deletions

View file

@ -1674,10 +1674,7 @@ void completer_t::perform_for_commandline(wcstring cmdline) {
custom_arg_data_t arg_data{&var_assignments};
arg_data.had_ddash = had_ddash;
assert(cmd_tok.offset < std::numeric_limits<uint32_t>::max());
assert(cmd_tok.length < std::numeric_limits<uint32_t>::max());
source_range_t command_range = {static_cast<uint32_t>(cmd_tok.offset),
static_cast<uint32_t>(cmd_tok.length)};
source_range_t command_range = {cmd_tok.offset, cmd_tok.length};
wcstring exp_command = cmd_tok.get_source(cmdline);
bool unescaped =

View file

@ -7,6 +7,9 @@
#include "common.h"
#include "enum_map.h"
using source_offset_t = uint32_t;
constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast<source_offset_t>(-1);
#define PARSER_DIE() \
do { \
FLOG(error, L"Parser dying!"); \
@ -15,10 +18,10 @@
// A range of source code.
struct source_range_t {
uint32_t start;
uint32_t length;
source_offset_t start;
source_offset_t length;
uint32_t end() const {
source_offset_t end() const {
assert(start + length >= start && "Overflow");
return start + length;
}

View file

@ -16,10 +16,6 @@
#include "parse_constants.h"
#include "tokenizer.h"
typedef uint32_t source_offset_t;
constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast<source_offset_t>(-1);
/// A struct representing the token type that we use internally.
struct parse_token_t {
enum parse_token_type_t type; // The type of the token as represented by the parser

View file

@ -10,8 +10,8 @@
#include "parse_constants.h"
#include "redirection.h"
/// Token types.
enum class token_type_t {
/// Token types. XXX Why this isn't parse_token_type_t, I'm not really sure.
enum class token_type_t : uint8_t {
error, /// Error reading token
string, /// String token
pipe, /// Pipe token
@ -39,7 +39,7 @@ enum class token_type_t {
using tok_flags_t = unsigned int;
enum class tokenizer_error_t {
enum class tokenizer_error_t : uint8_t {
none,
unterminated_quote,
unterminated_subshell,
@ -61,20 +61,20 @@ const wchar_t *tokenizer_get_error_message(tokenizer_error_t err);
struct tok_t {
// Offset of the token.
size_t offset{0};
source_offset_t offset{0};
// Length of the token.
size_t length{0};
source_offset_t length{0};
// If an error, this is the offset of the error within the token. A value of 0 means it occurred
// at 'offset'.
size_t error_offset_within_token{size_t(-1)};
// The type of the token.
token_type_t type;
source_offset_t error_offset_within_token{SOURCE_OFFSET_INVALID};
// If an error, this is the error code.
tokenizer_error_t error{tokenizer_error_t::none};
// The type of the token.
token_type_t type;
// Construct from a token type.
explicit tok_t(token_type_t type);