Cleanup of LRU cache implementation

Switch to CRTP from virtual functions and improve ownership semantics.
It's no longer necessary for clients to use new and delete.
This commit is contained in:
ridiculousfish 2017-01-27 12:18:16 -08:00
parent 61f272495d
commit e52a04e341
5 changed files with 191 additions and 187 deletions

View file

@ -57,13 +57,12 @@ autoload_t::autoload_t(const wcstring &env_var_name_var, const builtin_script_t
autoload_t::~autoload_t() { pthread_mutex_destroy(&lock); } autoload_t::~autoload_t() { pthread_mutex_destroy(&lock); }
void autoload_t::node_was_evicted(autoload_function_t *node) { void autoload_t::entry_was_evicted(wcstring key, autoload_function_t node) {
// This should only ever happen on the main thread. // This should only ever happen on the main thread.
ASSERT_IS_MAIN_THREAD(); ASSERT_IS_MAIN_THREAD();
// Tell ourselves that the command was removed if it was loaded. // Tell ourselves that the command was removed if it was loaded.
if (node->is_loaded) this->command_removed(node->key); if (node.is_loaded) this->command_removed(std::move(key));
delete node;
} }
int autoload_t::unload(const wcstring &cmd) { return this->evict_node(cmd); } int autoload_t::unload(const wcstring &cmd) { return this->evict_node(cmd); }
@ -130,7 +129,7 @@ static bool script_name_precedes_script_name(const builtin_script_t &script1,
/// Check whether the given command is loaded. /// Check whether the given command is loaded.
bool autoload_t::has_tried_loading(const wcstring &cmd) { bool autoload_t::has_tried_loading(const wcstring &cmd) {
scoped_lock locker(lock); scoped_lock locker(lock);
autoload_function_t *func = this->get_node(cmd); autoload_function_t *func = this->get(cmd);
return func != NULL; return func != NULL;
} }
@ -144,14 +143,16 @@ static bool is_stale(const autoload_function_t *func) {
autoload_function_t *autoload_t::get_autoloaded_function_with_creation(const wcstring &cmd, autoload_function_t *autoload_t::get_autoloaded_function_with_creation(const wcstring &cmd,
bool allow_eviction) { bool allow_eviction) {
ASSERT_IS_LOCKED(lock); ASSERT_IS_LOCKED(lock);
autoload_function_t *func = this->get_node(cmd); autoload_function_t *func = this->get(cmd);
if (!func) { if (!func) {
func = new autoload_function_t(cmd); bool added;
if (allow_eviction) { if (allow_eviction) {
this->add_node(func); added = this->insert(cmd, autoload_function_t(false));
} else { } else {
this->add_node_without_eviction(func); added = this->insert_no_eviction(cmd, autoload_function_t(false));
} }
func = this->get(cmd);
assert(func);
} }
return func; return func;
} }
@ -188,7 +189,7 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_
{ {
bool allow_stale_functions = !reload; bool allow_stale_functions = !reload;
scoped_lock locker(lock); scoped_lock locker(lock);
autoload_function_t *func = this->get_node(cmd); // get the function autoload_function_t *func = this->get(cmd); // get the function
// If we can use this function, return whether we were able to access it. // If we can use this function, return whether we were able to access it.
if (use_cached(func, really_load, allow_stale_functions)) { if (use_cached(func, really_load, allow_stale_functions)) {
@ -243,7 +244,7 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_
// Now we're actually going to take the lock. // Now we're actually going to take the lock.
scoped_lock locker(lock); scoped_lock locker(lock);
autoload_function_t *func = this->get_node(cmd); autoload_function_t *func = this->get(cmd);
// Generate the source if we need to load it. // Generate the source if we need to load it.
bool need_to_load_function = bool need_to_load_function =
@ -286,15 +287,15 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_
if (!found_file && !has_script_source) { if (!found_file && !has_script_source) {
scoped_lock locker(lock); scoped_lock locker(lock);
// Generate a placeholder. // Generate a placeholder.
autoload_function_t *func = this->get_node(cmd); autoload_function_t *func = this->get(cmd);
if (!func) { if (!func) {
func = new autoload_function_t(cmd);
func->is_placeholder = true;
if (really_load) { if (really_load) {
this->add_node(func); this->insert(cmd, autoload_function_t(true));
} else { } else {
this->add_node_without_eviction(func); this->insert(cmd, autoload_function_t(true));
} }
func = this->get(cmd);
assert(func);
} }
func->access.last_checked = time(NULL); func->access.last_checked = time(NULL);
} }

View file

@ -25,12 +25,11 @@ struct file_access_attempt_t {
}; };
file_access_attempt_t access_file(const wcstring &path, int mode); file_access_attempt_t access_file(const wcstring &path, int mode);
struct autoload_function_t : public lru_node_t { struct autoload_function_t {
explicit autoload_function_t(const wcstring &key) explicit autoload_function_t(bool placeholder)
: lru_node_t(key), : access(),
access(),
is_loaded(false), is_loaded(false),
is_placeholder(false), is_placeholder(placeholder),
is_internalized(false) {} is_internalized(false) {}
/// The last access attempt recorded /// The last access attempt recorded
@ -52,7 +51,7 @@ struct builtin_script_t {
class env_vars_snapshot_t; class env_vars_snapshot_t;
/// Class representing a path from which we can autoload and the autoloaded contents. /// Class representing a path from which we can autoload and the autoloaded contents.
class autoload_t : private lru_cache_t<autoload_function_t> { class autoload_t : public lru_cache_t<autoload_t, autoload_function_t> {
private: private:
/// Lock for thread safety. /// Lock for thread safety.
pthread_mutex_t lock; pthread_mutex_t lock;
@ -70,13 +69,11 @@ class autoload_t : private lru_cache_t<autoload_function_t> {
/// This is here to help prevent recursion. /// This is here to help prevent recursion.
std::set<wcstring> is_loading_set; std::set<wcstring> is_loading_set;
void remove_all_functions(void) { this->evict_all_nodes(); } void remove_all_functions() { this->evict_all_nodes(); }
bool locate_file_and_maybe_load_it(const wcstring &cmd, bool really_load, bool reload, bool locate_file_and_maybe_load_it(const wcstring &cmd, bool really_load, bool reload,
const wcstring_list_t &path_list); const wcstring_list_t &path_list);
virtual void node_was_evicted(autoload_function_t *node);
autoload_function_t *get_autoloaded_function_with_creation(const wcstring &cmd, autoload_function_t *get_autoloaded_function_with_creation(const wcstring &cmd,
bool allow_eviction); bool allow_eviction);
@ -85,7 +82,11 @@ class autoload_t : private lru_cache_t<autoload_function_t> {
virtual void command_removed(const wcstring &cmd) { UNUSED(cmd); } virtual void command_removed(const wcstring &cmd) { UNUSED(cmd); }
public: public:
/// Create an autoload_t for the given environment variable name.
// CRTP override
void entry_was_evicted(wcstring key, autoload_function_t node);
// Create an autoload_t for the given environment variable name.
autoload_t(const wcstring &env_var_name_var, const builtin_script_t *scripts, autoload_t(const wcstring &env_var_name_var, const builtin_script_t *scripts,
size_t script_count); size_t script_count);

View file

@ -1122,20 +1122,14 @@ static void test_escape_sequences(void) {
err(L"test_escape_sequences failed on line %d\n", __LINE__); err(L"test_escape_sequences failed on line %d\n", __LINE__);
} }
class lru_node_test_t : public lru_node_t { class test_lru_t : public lru_cache_t<test_lru_t, int> {
public: public:
explicit lru_node_test_t(const wcstring &tmp) : lru_node_t(tmp) {} test_lru_t() : lru_cache_t<test_lru_t, int>(16) {}
};
class test_lru_t : public lru_cache_t<lru_node_test_t> { std::vector<std::pair<wcstring, int>> evicted;
public:
test_lru_t() : lru_cache_t<lru_node_test_t>(16) {}
std::vector<lru_node_test_t *> evicted_nodes; void entry_was_evicted(wcstring key, int val) {
evicted.push_back({key, val});
virtual void node_was_evicted(lru_node_test_t *node) {
do_test(find(evicted_nodes.begin(), evicted_nodes.end(), node) == evicted_nodes.end());
evicted_nodes.push_back(node);
} }
}; };
@ -1143,24 +1137,18 @@ static void test_lru(void) {
say(L"Testing LRU cache"); say(L"Testing LRU cache");
test_lru_t cache; test_lru_t cache;
std::vector<lru_node_test_t *> expected_evicted; std::vector<std::pair<wcstring, int>> expected_evicted;
size_t total_nodes = 20; int total_nodes = 20;
for (size_t i = 0; i < total_nodes; i++) { for (int i = 0; i < total_nodes; i++) {
do_test(cache.size() == std::min(i, (size_t)16)); do_test(cache.size() == size_t(std::min(i, 16)));
lru_node_test_t *node = new lru_node_test_t(to_string(i)); if (i < 4) expected_evicted.push_back({to_string(i), i});
if (i < 4) expected_evicted.push_back(node);
// Adding the node the first time should work, and subsequent times should fail. // Adding the node the first time should work, and subsequent times should fail.
do_test(cache.add_node(node)); do_test(cache.insert(to_string(i), i));
do_test(!cache.add_node(node)); do_test(!cache.insert(to_string(i), i+1));
} }
do_test(cache.evicted_nodes == expected_evicted); do_test(cache.evicted == expected_evicted);
cache.evict_all_nodes(); cache.evict_all_nodes();
do_test(cache.evicted_nodes.size() == total_nodes); do_test(cache.evicted.size() == size_t(total_nodes));
while (!cache.evicted_nodes.empty()) {
lru_node_t *node = cache.evicted_nodes.back();
cache.evicted_nodes.pop_back();
delete node;
}
} }
/// Perform parameter expansion and test if the output equals the zero-terminated parameter list /// Perform parameter expansion and test if the output equals the zero-terminated parameter list

View file

@ -142,23 +142,22 @@ static bool history_file_lock(int fd, int lock_type) {
/// Our LRU cache is used for restricting the amount of history we have, and limiting how long we /// Our LRU cache is used for restricting the amount of history we have, and limiting how long we
/// order it. /// order it.
class history_lru_node_t : public lru_node_t { class history_lru_item_t {
public: public:
wcstring text;
time_t timestamp; time_t timestamp;
path_list_t required_paths; path_list_t required_paths;
explicit history_lru_node_t(const history_item_t &item) explicit history_lru_item_t(const history_item_t &item)
: lru_node_t(item.str()), : text(item.str()),
timestamp(item.timestamp()), timestamp(item.timestamp()),
required_paths(item.get_required_paths()) {} required_paths(item.get_required_paths()) {}
}; };
class history_lru_cache_t : public lru_cache_t<history_lru_node_t> { class history_lru_cache_t : public lru_cache_t<history_lru_cache_t, history_lru_item_t> {
protected: typedef lru_cache_t<history_lru_cache_t, history_lru_item_t> super;
/// Override to delete evicted nodes.
virtual void node_was_evicted(history_lru_node_t *node) { delete node; }
public: public:
explicit history_lru_cache_t(size_t max) : lru_cache_t<history_lru_node_t>(max) {}
using super::super;
/// Function to add a history item. /// Function to add a history item.
void add_item(const history_item_t &item) { void add_item(const history_item_t &item) {
@ -167,10 +166,10 @@ class history_lru_cache_t : public lru_cache_t<history_lru_node_t> {
// See if it's in the cache. If it is, update the timestamp. If not, we create a new node // See if it's in the cache. If it is, update the timestamp. If not, we create a new node
// and add it. Note that calling get_node promotes the node to the front. // and add it. Note that calling get_node promotes the node to the front.
history_lru_node_t *node = this->get_node(item.str()); wcstring key = item.str();
history_lru_item_t *node = this->get(key);
if (node == NULL) { if (node == NULL) {
node = new history_lru_node_t(item); this->insert(std::move(key), history_lru_item_t(item));
this->add_node(node);
} else { } else {
node->timestamp = std::max(node->timestamp, item.timestamp()); node->timestamp = std::max(node->timestamp, item.timestamp());
// What to do about paths here? Let's just ignore them. // What to do about paths here? Let's just ignore them.
@ -1248,8 +1247,9 @@ bool history_t::save_internal_via_rewrite() {
bool errored = false; bool errored = false;
history_output_buffer_t buffer; history_output_buffer_t buffer;
for (history_lru_cache_t::iterator iter = lru.begin(); iter != lru.end(); ++iter) { for (history_lru_cache_t::iterator iter = lru.begin(); iter != lru.end(); ++iter) {
const history_lru_node_t *node = *iter; const wcstring &text = (*iter).first;
append_yaml_to_buffer(node->key, node->timestamp, node->required_paths, &buffer); const history_lru_item_t &item = (*iter).second;
append_yaml_to_buffer(text, item.timestamp, item.required_paths, &buffer);
if (buffer.output_size() >= HISTORY_OUTPUT_BUFFER_SIZE && if (buffer.output_size() >= HISTORY_OUTPUT_BUFFER_SIZE &&
!buffer.flush_to_fd(out_fd)) { !buffer.flush_to_fd(out_fd)) {
errored = true; errored = true;

258
src/lru.h
View file

@ -4,190 +4,204 @@
#include <assert.h> #include <assert.h>
#include <wchar.h> #include <wchar.h>
#include <list>
#include <map> #include <map>
#include <set>
#include "common.h" #include "common.h"
/// A predicate to compare dereferenced pointers. // Least-recently-used cache class
struct dereference_less_t { // This a map from wcstring to CONTENTS, that will evict entries when the count exceeds the maximum.
template <typename ptr_t> // It uses CRTP to inform clients when entries are evicted. This uses the classic LRU cache structure:
bool operator()(ptr_t p1, ptr_t p2) const { // a dictionary mapping keys to nodes, where the nodes also form a linked list. Our linked list is
return *p1 < *p2; // circular and has a sentinel node (the "mouth" - picture a snake swallowing its tail). This simplifies
} // the logic: no pointer is ever NULL! It also works well with C++'s iterator since the sentinel node
}; // is a natural value for end(). Our nodes also have the unusual property of having a "back pointer":
// they store an iterator to the entry in the map containing the node. This allows us, given a node, to
class lru_node_t { // immediately locate the node and its key in the dictionary. This allows us to avoid duplicating the key
template <class T> // in the node.
friend class lru_cache_t; template <class DERIVED, class CONTENTS>
/// Our linked list pointer.
lru_node_t *prev, *next;
public:
/// The key used to look up in the cache.
const wcstring key;
/// Constructor.
explicit lru_node_t(const wcstring &pkey) : prev(NULL), next(NULL), key(pkey) {}
/// Virtual destructor that does nothing for classes that inherit lru_node_t.
virtual ~lru_node_t() {}
/// operator< for std::set
bool operator<(const lru_node_t &other) const { return key < other.key; }
};
template <class node_type_t>
class lru_cache_t { class lru_cache_t {
private:
/// Max node count. This may be (transiently) exceeded by add_node_without_eviction, which is struct lru_node_t;
/// used from background threads. typedef typename std::map<wcstring, lru_node_t>::iterator node_iter_t;
struct lru_link_t {
// Our doubly linked list
// The base class is used for the mouth only
lru_link_t *prev = NULL;
lru_link_t *next = NULL;
};
// The node type in our LRU cache
struct lru_node_t : public lru_link_t {
// No copying
lru_node_t(const lru_node_t &) = delete;
lru_node_t &operator=(const lru_node_t &) = delete;
lru_node_t(lru_node_t &&) = default;
// Our location in the map!
node_iter_t iter;
// The value from the client
CONTENTS value;
explicit lru_node_t(CONTENTS v) :
value(std::move(v))
{}
};
// Max node count. This may be (transiently) exceeded by add_node_without_eviction, which is
// used from background threads.
const size_t max_node_count; const size_t max_node_count;
/// Count of nodes. // All of our nodes
size_t node_count; // Note that our linked list contains pointers to these nodes in the map
// We are dependent on the iterator-noninvalidation guarantees of std::map
std::map<wcstring, lru_node_t> node_map;
/// The set of nodes. // Head of the linked list
typedef std::set<lru_node_t *, dereference_less_t> node_set_t; // The list is circular!
node_set_t node_set; // If "empty" the mouth just points at itself.
lru_link_t mouth;
void promote_node(node_type_t *node) { // Take a node and move it to the front of the list
// We should never promote the mouth. void promote_node(lru_node_t *node) {
assert(node != &mouth); assert(node != &mouth);
// First unhook us. // First unhook us
node->prev->next = node->next; node->prev->next = node->next;
node->next->prev = node->prev; node->next->prev = node->prev;
// Put us after the mouth. // Put us after the mouth
node->next = mouth.next; node->next = mouth.next;
node->next->prev = node; node->next->prev = node;
node->prev = &mouth; node->prev = &mouth;
mouth.next = node; mouth.next = node;
} }
void evict_node(node_type_t *condemned_node) { // Remove the node
void evict_node(lru_node_t *node) {
assert(node != &mouth);
// We should never evict the mouth. // We should never evict the mouth.
assert(condemned_node != NULL && condemned_node != &mouth); assert(node != NULL && node->iter != this->node_map.end());
// Remove it from the linked list. // Remove it from the linked list.
condemned_node->prev->next = condemned_node->next; node->prev->next = node->next;
condemned_node->next->prev = condemned_node->prev; node->next->prev = node->prev;
// Remove us from the set. // Pull out our key and value
node_set.erase(condemned_node); wcstring key = std::move(node->iter->first);
node_count--; CONTENTS value(std::move(node->value));
// Tell ourselves. // Remove us from the map. This deallocates node!
this->node_was_evicted(condemned_node); node_map.erase(node->iter);
node = NULL;
// Tell ourselves what we did
DERIVED *dthis = static_cast<DERIVED *>(this);
dthis->entry_was_evicted(std::move(key), std::move(value));
} }
void evict_last_node(void) { evict_node((node_type_t *)mouth.prev); } // Evicts the last node
void evict_last_node() {
assert(mouth.prev != &mouth);
evict_node(static_cast<lru_node_t *>(mouth.prev));
}
static lru_node_t *get_previous(lru_node_t *node) { return node->prev; } // CRTP callback for when a node is evicted.
// Clients can implement this
protected: void entry_was_evicted(wcstring key, CONTENTS value) {
/// Head of the linked list. USE(key);
lru_node_t mouth; USE(value);
}
/// Overridable callback for when a node is evicted.
virtual void node_was_evicted(node_type_t *node) { UNUSED(node); }
public: public:
/// Constructor // Constructor
explicit lru_cache_t(size_t max_size = 1024) // Note our linked list is always circular!
: max_node_count(max_size), node_count(0), mouth(wcstring()) { explicit lru_cache_t(size_t max_size = 1024) : max_node_count(max_size) {
// Hook up the mouth to itself: a one node circularly linked list! mouth.next = mouth.prev = &mouth;
mouth.prev = mouth.next = &mouth;
} }
/// Note that we do not evict nodes in our destructor (even though they typically need to be // Returns the value for a given key, or NULL.
/// deleted by their creator). // This counts as a "use" and so promotes the node
virtual ~lru_cache_t() {} CONTENTS *get(const wcstring &key) {
auto where = this->node_map.find(key);
/// Returns the node for a given key, or NULL. if (where == this->node_map.end()) {
node_type_t *get_node(const wcstring &key) { // not found
node_type_t *result = NULL; return NULL;
// Construct a fake node as our key.
lru_node_t node_key(key);
// Look for it in the set.
node_set_t::iterator iter = node_set.find(&node_key);
// If we found a node, promote and return it.
if (iter != node_set.end()) {
result = static_cast<node_type_t *>(*iter);
promote_node(result);
} }
return result; promote_node(&where->second);
return &where->second.value;
} }
/// Evicts the node for a given key, returning true if a node was evicted. // Evicts the node for a given key, returning true if a node was evicted.
bool evict_node(const wcstring &key) { bool evict_node(const wcstring &key) {
// Construct a fake node as our key. auto where = this->node_map.find(key);
lru_node_t node_key(key); if (where == this->node_map.end()) return false;
evict_node(&where->second);
// Look for it in the set.
node_set_t::iterator iter = node_set.find(&node_key);
if (iter == node_set.end()) return false;
// Evict the given node.
evict_node(static_cast<node_type_t *>(*iter));
return true; return true;
} }
/// Adds a node under the given key. Returns true if the node was added, false if the node was // Adds a node under the given key. Returns true if the node was added, false if the node was
/// not because a node with that key is already in the set. // not because a node with that key is already in the set.
bool add_node(node_type_t *node) { bool insert(wcstring key, CONTENTS value) {
// Add our node without eviction. if (! this->insert_no_eviction(std::move(key), std::move(value))) {
if (!this->add_node_without_eviction(node)) return false; return false;
}
while (node_count > max_node_count) evict_last_node(); // evict while (this->node_map.size() > max_node_count) {
evict_last_node();
}
return true; return true;
} }
/// Adds a node under the given key without triggering eviction. Returns true if the node was // Adds a node under the given key without triggering eviction. Returns true if the node was
/// added, false if the node was not because a node with that key is already in the set. // added, false if the node was not because a node with that key is already in the set.
bool add_node_without_eviction(node_type_t *node) { bool insert_no_eviction(wcstring key, CONTENTS value) {
assert(node != NULL && node != &mouth);
// Try inserting; return false if it was already in the set. // Try inserting; return false if it was already in the set.
if (!node_set.insert(node).second) return false; auto iter_inserted = this->node_map.emplace(std::move(key), lru_node_t(std::move(value)));
if (! iter_inserted.second) {
// already present
return false;
}
// Tell the node where it is in the map
node_iter_t iter = iter_inserted.first;
lru_node_t *node = &iter->second;
node->iter = iter;
// Add the node after the mouth.
node->next = mouth.next; node->next = mouth.next;
node->next->prev = node; node->next->prev = node;
node->prev = &mouth; node->prev = &mouth;
mouth.next = node; mouth.next = node;
// Update the count. This may push us over the maximum node count.
node_count++;
return true; return true;
} }
/// Counts nodes. // Number of entries
size_t size(void) { return node_count; } size_t size() { return this->node_map.size(); }
/// Evicts all nodes.
void evict_all_nodes(void) { void evict_all_nodes(void) {
while (node_count > 0) { while (this->size() > 0) {
evict_last_node(); evict_last_node();
} }
} }
/// Iterator for walking nodes, from least recently used to most. // Iterator for walking nodes, from least recently used to most.
class iterator { class iterator {
lru_node_t *node; lru_link_t *node;
public: public:
explicit iterator(lru_node_t *val) : node(val) {} typedef std::pair<const wcstring &, const CONTENTS &> value_type;
void operator++() { node = lru_cache_t::get_previous(node); }
explicit iterator(lru_link_t *val) : node(val) {}
void operator++() { node = node->prev; }
bool operator==(const iterator &other) { return node == other.node; } bool operator==(const iterator &other) { return node == other.node; }
bool operator!=(const iterator &other) { return !(*this == other); } bool operator!=(const iterator &other) { return !(*this == other); }
node_type_t *operator*() { return static_cast<node_type_t *>(node); } value_type operator*() const {
const lru_node_t *dnode = static_cast<const lru_node_t *>(node);
const wcstring &key = dnode->iter->first;
return {key, dnode->value};
}
}; };
iterator begin() { return iterator(mouth.prev); } iterator begin() { return iterator(mouth.prev); }