Space and time optimizations for parse_node_t. Reduced size from 48

bytes to 20 bytes.
This commit is contained in:
ridiculousfish 2014-03-25 20:06:34 -07:00
parent b520a03c57
commit 9fece3fdf1
5 changed files with 63 additions and 48 deletions

View file

@ -218,7 +218,9 @@
#if __GNUC__ >= 3 #if __GNUC__ >= 3
#define __warn_unused __attribute__ ((warn_unused_result)) #define __warn_unused __attribute__ ((warn_unused_result))
#define __sentinel __attribute__ ((sentinel)) #define __sentinel __attribute__ ((sentinel))
#define __packed __attribute__ ((packed))
#else #else
#define __warn_unused #define __warn_unused
#define __sentinel #define __sentinel
#define __packed
#endif #endif

View file

@ -70,7 +70,7 @@ enum parse_token_type_t
LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate, LAST_TOKEN_OR_SYMBOL = parse_token_type_terminate,
FIRST_PARSE_TOKEN_TYPE = parse_token_type_string FIRST_PARSE_TOKEN_TYPE = parse_token_type_string
}; } __packed;
enum parse_keyword_t enum parse_keyword_t
{ {
@ -93,7 +93,7 @@ enum parse_keyword_t
parse_keyword_exec, parse_keyword_exec,
LAST_KEYWORD = parse_keyword_builtin LAST_KEYWORD = parse_keyword_builtin
}; } __packed;
/* Statement decorations. This matches the order of productions in decorated_statement */ /* Statement decorations. This matches the order of productions in decorated_statement */
enum parse_statement_decoration_t enum parse_statement_decoration_t

View file

@ -72,7 +72,8 @@ node_offset_t parse_execution_context_t::get_offset(const parse_node_t &node) co
const parse_node_t *addr = &node; const parse_node_t *addr = &node;
const parse_node_t *base = &this->tree.at(0); const parse_node_t *base = &this->tree.at(0);
assert(addr >= base); assert(addr >= base);
node_offset_t offset = addr - base; assert(addr - base < SOURCE_OFFSET_INVALID);
node_offset_t offset = static_cast<node_offset_t>(addr - base);
assert(offset < this->tree.size()); assert(offset < this->tree.size());
assert(&tree.at(offset) == &node); assert(&tree.at(offset) == &node);
return offset; return offset;

View file

@ -13,6 +13,13 @@ static bool production_is_empty(const production_t *production)
return (*production)[0] == token_type_invalid; return (*production)[0] == token_type_invalid;
} }
void swap2(parse_node_tree_t &a, parse_node_tree_t &b)
{
fprintf(stderr, "Swapping!\n");
// This uses the base vector implementation
a.swap(b);
}
/** Returns a string description of this parse error */ /** Returns a string description of this parse error */
wcstring parse_error_t::describe_with_prefix(const wcstring &src, const wcstring &prefix, bool skip_caret) const wcstring parse_error_t::describe_with_prefix(const wcstring &src, const wcstring &prefix, bool skip_caret) const
{ {
@ -404,7 +411,7 @@ static void dump_tree_recursive(const parse_node_tree_t &nodes, const wcstring &
result->push_back(L'\n'); result->push_back(L'\n');
++*line; ++*line;
for (size_t child_idx = node.child_start; child_idx < node.child_start + node.child_count; child_idx++) for (node_offset_t child_idx = node.child_start; child_idx < node.child_start + node.child_count; child_idx++)
{ {
dump_tree_recursive(nodes, src, child_idx, indent + 1, result, line, inout_first_node_not_dumped); dump_tree_recursive(nodes, src, child_idx, indent + 1, result, line, inout_first_node_not_dumped);
} }
@ -529,28 +536,31 @@ class parse_ll_t
} }
// Get the parent index. But we can't get the parent parse node yet, since it may be made invalid by adding children // Get the parent index. But we can't get the parent parse node yet, since it may be made invalid by adding children
const size_t parent_node_idx = symbol_stack.back().node_idx; const node_offset_t parent_node_idx = symbol_stack.back().node_idx;
// Add the children. Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack) // Add the children. Confusingly, we want our nodes to be in forwards order (last token last, so dumps look nice), but the symbols should be reverse order (last token first, so it's lowest on the stack)
const size_t child_start = nodes.size(); const size_t child_start_big = nodes.size();
size_t child_count = 0; assert(child_start_big < NODE_OFFSET_INVALID);
node_offset_t child_start = static_cast<node_offset_t>(child_start_big);
// To avoid constructing multiple nodes, we push_back a single one that we modify
parse_node_t representative_child(token_type_invalid);
representative_child.parent = parent_node_idx;
node_offset_t child_count = 0;
for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++) for (size_t i=0; i < MAX_SYMBOLS_PER_PRODUCTION; i++)
{ {
production_element_t elem = (*production)[i]; production_element_t elem = (*production)[i];
if (!production_element_is_valid(elem)) if (! production_element_is_valid(elem))
{ {
// All done, bail out // All done, bail out
break; break;
} }
else
{ // Append the parse node.
// Generate the parse node. representative_child.type = production_element_type(elem);
parse_token_type_t child_type = production_element_type(elem); nodes.push_back(representative_child);
parse_node_t child = parse_node_t(child_type); child_count++;
child.parent = parent_node_idx;
nodes.push_back(child);
child_count++;
}
} }
// Update the parent // Update the parent
@ -566,7 +576,7 @@ class parse_ll_t
// Replace the top of the stack with new stack elements corresponding to our new nodes. Note that these go in reverse order. // Replace the top of the stack with new stack elements corresponding to our new nodes. Note that these go in reverse order.
symbol_stack.pop_back(); symbol_stack.pop_back();
symbol_stack.reserve(symbol_stack.size() + child_count); symbol_stack.reserve(symbol_stack.size() + child_count);
size_t idx = child_count; node_offset_t idx = child_count;
while (idx--) while (idx--)
{ {
production_element_t elem = (*production)[idx]; production_element_t elem = (*production)[idx];
@ -652,18 +662,17 @@ void parse_ll_t::dump_stack(void) const
// Since children always appear after their parents, we can implement this very simply by walking backwards // Since children always appear after their parents, we can implement this very simply by walking backwards
void parse_ll_t::determine_node_ranges(void) void parse_ll_t::determine_node_ranges(void)
{ {
const size_t source_start_invalid = -1;
size_t idx = nodes.size(); size_t idx = nodes.size();
while (idx--) while (idx--)
{ {
parse_node_t *parent = &nodes.at(idx); parse_node_t *parent = &nodes[idx];
// Skip nodes that already have a source range. These are terminal nodes. // Skip nodes that already have a source range. These are terminal nodes.
if (parent->source_start != source_start_invalid) if (parent->source_start != SOURCE_OFFSET_INVALID)
continue; continue;
// Ok, this node needs a source range. Get all of its children, and then set its range. // Ok, this node needs a source range. Get all of its children, and then set its range.
size_t min_start = source_start_invalid, max_end = 0; //note source_start_invalid is huge source_offset_t min_start = SOURCE_OFFSET_INVALID, max_end = 0; //note SOURCE_OFFSET_INVALID is huge
for (node_offset_t i=0; i < parent->child_count; i++) for (node_offset_t i=0; i < parent->child_count; i++)
{ {
const parse_node_t &child = nodes.at(parent->child_offset(i)); const parse_node_t &child = nodes.at(parent->child_offset(i));
@ -674,7 +683,7 @@ void parse_ll_t::determine_node_ranges(void)
} }
} }
if (min_start != source_start_invalid) if (min_start != SOURCE_OFFSET_INVALID)
{ {
assert(max_end >= min_start); assert(max_end >= min_start);
parent->source_start = min_start; parent->source_start = min_start;
@ -831,7 +840,7 @@ void parse_ll_t::parse_error(const wchar_t *expected, parse_token_t token)
void parse_ll_t::reset_symbols(enum parse_token_type_t goal) void parse_ll_t::reset_symbols(enum parse_token_type_t goal)
{ {
/* Add a new goal node, and then reset our symbol list to point at it */ /* Add a new goal node, and then reset our symbol list to point at it */
node_offset_t where = nodes.size(); node_offset_t where = static_cast<node_offset_t>(nodes.size());
nodes.push_back(parse_node_t(goal)); nodes.push_back(parse_node_t(goal));
symbol_stack.clear(); symbol_stack.clear();
@ -1047,14 +1056,10 @@ static parse_keyword_t keyword_for_token(token_type tok, const wchar_t *tok_txt)
} }
/* Placeholder invalid token */ /* Placeholder invalid token */
static const parse_token_t kInvalidToken = {token_type_invalid, static const parse_token_t kInvalidToken = {token_type_invalid, parse_keyword_none, false, false, SOURCE_OFFSET_INVALID, 0};
parse_keyword_none, false, false, static_cast<size_t>(-1),
static_cast<size_t>(-1)};
/* Terminal token */ /* Terminal token */
static const parse_token_t kTerminalToken = {parse_token_type_terminate, static const parse_token_t kTerminalToken = {parse_token_type_terminate, parse_keyword_none, false, false, SOURCE_OFFSET_INVALID, 0};
parse_keyword_none, false, false, static_cast<size_t>(-1),
static_cast<size_t>(-1)};
static inline bool is_help_argument(const wchar_t *txt) static inline bool is_help_argument(const wchar_t *txt)
{ {
@ -1082,8 +1087,8 @@ static inline parse_token_t next_parse_token(tokenizer_t *tok)
result.keyword = keyword_for_token(tok_type, tok_txt); result.keyword = keyword_for_token(tok_type, tok_txt);
result.has_dash_prefix = (tok_txt[0] == L'-'); result.has_dash_prefix = (tok_txt[0] == L'-');
result.is_help_argument = result.has_dash_prefix && is_help_argument(tok_txt); result.is_help_argument = result.has_dash_prefix && is_help_argument(tok_txt);
result.source_start = (size_t)tok_start; result.source_start = (source_offset_t)tok_start;
result.source_length = tok_extent; result.source_length = (source_offset_t)tok_extent;
tok_next(tok); tok_next(tok);
return result; return result;
@ -1195,7 +1200,7 @@ const parse_node_t *parse_node_tree_t::get_child(const parse_node_t &parent, nod
const parse_node_t &parse_node_tree_t::find_child(const parse_node_t &parent, parse_token_type_t type) const const parse_node_t &parse_node_tree_t::find_child(const parse_node_t &parent, parse_token_type_t type) const
{ {
for (size_t i=0; i < parent.child_count; i++) for (node_offset_t i=0; i < parent.child_count; i++)
{ {
const parse_node_t *child = this->get_child(parent, i); const parse_node_t *child = this->get_child(parent, i);
if (child->type == type) if (child->type == type)
@ -1241,7 +1246,7 @@ static void find_nodes_recursive(const parse_node_tree_t &tree, const parse_node
if (result->size() < max_count) if (result->size() < max_count)
{ {
if (parent.type == type) result->push_back(&parent); if (parent.type == type) result->push_back(&parent);
for (size_t i=0; i < parent.child_count; i++) for (node_offset_t i=0; i < parent.child_count; i++)
{ {
const parse_node_t *child = tree.get_child(parent, i); const parse_node_t *child = tree.get_child(parent, i);
assert(child != NULL); assert(child != NULL);
@ -1478,7 +1483,7 @@ const parse_node_t *parse_node_tree_t::next_node_in_node_list(const parse_node_t
const parse_node_t *next_cursor = NULL; const parse_node_t *next_cursor = NULL;
/* Walk through the children */ /* Walk through the children */
for (size_t i=0; i < list_cursor->child_count; i++) for (node_offset_t i=0; i < list_cursor->child_count; i++)
{ {
const parse_node_t *child = this->get_child(*list_cursor, i); const parse_node_t *child = this->get_child(*list_cursor, i);
if (child->type == entry_type) if (child->type == entry_type)

View file

@ -18,9 +18,15 @@
class parse_node_t; class parse_node_t;
class parse_node_tree_t; class parse_node_tree_t;
typedef size_t node_offset_t;
typedef uint32_t node_offset_t;
#define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1)) #define NODE_OFFSET_INVALID (static_cast<node_offset_t>(-1))
typedef uint32_t source_offset_t;
#define SOURCE_OFFSET_INVALID (static_cast<source_offset_t>(-1))
struct parse_error_t struct parse_error_t
{ {
/** Text of the error */ /** Text of the error */
@ -51,8 +57,8 @@ struct parse_token_t
enum parse_keyword_t keyword; // Any keyword represented by this token enum parse_keyword_t keyword; // Any keyword represented by this token
bool has_dash_prefix; // Hackish: whether the source contains a dash prefix bool has_dash_prefix; // Hackish: whether the source contains a dash prefix
bool is_help_argument; // Hackish: whether the source looks like '-h' or '--help' bool is_help_argument; // Hackish: whether the source looks like '-h' or '--help'
size_t source_start; source_offset_t source_start;
size_t source_length; source_offset_t source_length;
wcstring describe() const; wcstring describe() const;
wcstring user_presentable_description() const; wcstring user_presentable_description() const;
@ -83,35 +89,36 @@ wcstring parse_dump_tree(const parse_node_tree_t &tree, const wcstring &src);
wcstring token_type_description(parse_token_type_t type); wcstring token_type_description(parse_token_type_t type);
wcstring keyword_description(parse_keyword_t type); wcstring keyword_description(parse_keyword_t type);
/** Class for nodes of a parse tree */ /** Class for nodes of a parse tree. Since there's a lot of these, the size and order of the fields is important. */
class parse_node_t class parse_node_t
{ {
public: public:
/* Type of the node */
enum parse_token_type_t type;
/* Start in the source code */ /* Start in the source code */
size_t source_start; source_offset_t source_start;
/* Length of our range in the source code */ /* Length of our range in the source code */
size_t source_length; source_offset_t source_length;
/* Parent */ /* Parent */
node_offset_t parent; node_offset_t parent;
/* Children */ /* Children */
node_offset_t child_start; node_offset_t child_start;
/* Number of children */
uint8_t child_count; uint8_t child_count;
/* Which production was used */ /* Which production was used */
uint8_t production_idx; uint8_t production_idx;
/* Type of the node */
enum parse_token_type_t type;
/* Description */ /* Description */
wcstring describe(void) const; wcstring describe(void) const;
/* Constructor */ /* Constructor */
explicit parse_node_t(parse_token_type_t ty) : type(ty), source_start(-1), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), production_idx(-1) explicit parse_node_t(parse_token_type_t ty) : source_start(SOURCE_OFFSET_INVALID), source_length(0), parent(NODE_OFFSET_INVALID), child_start(0), child_count(0), production_idx(-1), type(ty)
{ {
} }
@ -124,7 +131,7 @@ public:
/* Indicate if this node has a range of source code associated with it */ /* Indicate if this node has a range of source code associated with it */
bool has_source() const bool has_source() const
{ {
return source_start != (size_t)(-1); return source_start != SOURCE_OFFSET_INVALID;
} }
/* Gets source for the node, or the empty string if it has no source */ /* Gets source for the node, or the empty string if it has no source */
@ -143,7 +150,6 @@ public:
} }
}; };
/* The parse tree itself */ /* The parse tree itself */
class parse_node_tree_t : public std::vector<parse_node_t> class parse_node_tree_t : public std::vector<parse_node_t>
{ {
@ -200,6 +206,7 @@ public:
parse_node_list_t specific_statements_for_job(const parse_node_t &job) const; parse_node_list_t specific_statements_for_job(const parse_node_t &job) const;
}; };
/* The big entry point. Parse a string, attempting to produce a tree for the given goal type */ /* The big entry point. Parse a string, attempting to produce a tree for the given goal type */
bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, parse_token_type_t goal = symbol_job_list); bool parse_tree_from_string(const wcstring &str, parse_tree_flags_t flags, parse_node_tree_t *output, parse_error_list_t *errors, parse_token_type_t goal = symbol_job_list);