Cleanup of LRU cache implementation

Switch to CRTP from virtual functions and improve ownership semantics.
It's no longer necessary for clients to use new and delete.
This commit is contained in:
ridiculousfish 2017-01-27 12:18:16 -08:00
parent 61f272495d
commit e52a04e341
5 changed files with 191 additions and 187 deletions

View file

@ -57,13 +57,12 @@ autoload_t::autoload_t(const wcstring &env_var_name_var, const builtin_script_t
autoload_t::~autoload_t() { pthread_mutex_destroy(&lock); }
void autoload_t::node_was_evicted(autoload_function_t *node) {
void autoload_t::entry_was_evicted(wcstring key, autoload_function_t node) {
// This should only ever happen on the main thread.
ASSERT_IS_MAIN_THREAD();
// Tell ourselves that the command was removed if it was loaded.
if (node->is_loaded) this->command_removed(node->key);
delete node;
if (node.is_loaded) this->command_removed(std::move(key));
}
int autoload_t::unload(const wcstring &cmd) { return this->evict_node(cmd); }
@ -130,7 +129,7 @@ static bool script_name_precedes_script_name(const builtin_script_t &script1,
/// Check whether the given command is loaded.
bool autoload_t::has_tried_loading(const wcstring &cmd) {
scoped_lock locker(lock);
autoload_function_t *func = this->get_node(cmd);
autoload_function_t *func = this->get(cmd);
return func != NULL;
}
@ -144,14 +143,16 @@ static bool is_stale(const autoload_function_t *func) {
autoload_function_t *autoload_t::get_autoloaded_function_with_creation(const wcstring &cmd,
bool allow_eviction) {
ASSERT_IS_LOCKED(lock);
autoload_function_t *func = this->get_node(cmd);
autoload_function_t *func = this->get(cmd);
if (!func) {
func = new autoload_function_t(cmd);
bool added;
if (allow_eviction) {
this->add_node(func);
added = this->insert(cmd, autoload_function_t(false));
} else {
this->add_node_without_eviction(func);
added = this->insert_no_eviction(cmd, autoload_function_t(false));
}
func = this->get(cmd);
assert(func);
}
return func;
}
@ -188,7 +189,7 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_
{
bool allow_stale_functions = !reload;
scoped_lock locker(lock);
autoload_function_t *func = this->get_node(cmd); // get the function
autoload_function_t *func = this->get(cmd); // get the function
// If we can use this function, return whether we were able to access it.
if (use_cached(func, really_load, allow_stale_functions)) {
@ -243,7 +244,7 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_
// Now we're actually going to take the lock.
scoped_lock locker(lock);
autoload_function_t *func = this->get_node(cmd);
autoload_function_t *func = this->get(cmd);
// Generate the source if we need to load it.
bool need_to_load_function =
@ -286,15 +287,15 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_
if (!found_file && !has_script_source) {
scoped_lock locker(lock);
// Generate a placeholder.
autoload_function_t *func = this->get_node(cmd);
autoload_function_t *func = this->get(cmd);
if (!func) {
func = new autoload_function_t(cmd);
func->is_placeholder = true;
if (really_load) {
this->add_node(func);
this->insert(cmd, autoload_function_t(true));
} else {
this->add_node_without_eviction(func);
this->insert(cmd, autoload_function_t(true));
}
func = this->get(cmd);
assert(func);
}
func->access.last_checked = time(NULL);
}

View file

@ -25,12 +25,11 @@ struct file_access_attempt_t {
};
file_access_attempt_t access_file(const wcstring &path, int mode);
struct autoload_function_t : public lru_node_t {
explicit autoload_function_t(const wcstring &key)
: lru_node_t(key),
access(),
struct autoload_function_t {
explicit autoload_function_t(bool placeholder)
: access(),
is_loaded(false),
is_placeholder(false),
is_placeholder(placeholder),
is_internalized(false) {}
/// The last access attempt recorded
@ -52,7 +51,7 @@ struct builtin_script_t {
class env_vars_snapshot_t;
/// Class representing a path from which we can autoload and the autoloaded contents.
class autoload_t : private lru_cache_t<autoload_function_t> {
class autoload_t : public lru_cache_t<autoload_t, autoload_function_t> {
private:
/// Lock for thread safety.
pthread_mutex_t lock;
@ -70,13 +69,11 @@ class autoload_t : private lru_cache_t<autoload_function_t> {
/// This is here to help prevent recursion.
std::set<wcstring> is_loading_set;
void remove_all_functions(void) { this->evict_all_nodes(); }
void remove_all_functions() { this->evict_all_nodes(); }
bool locate_file_and_maybe_load_it(const wcstring &cmd, bool really_load, bool reload,
const wcstring_list_t &path_list);
virtual void node_was_evicted(autoload_function_t *node);
autoload_function_t *get_autoloaded_function_with_creation(const wcstring &cmd,
bool allow_eviction);
@ -85,7 +82,11 @@ class autoload_t : private lru_cache_t<autoload_function_t> {
virtual void command_removed(const wcstring &cmd) { UNUSED(cmd); }
public:
/// Create an autoload_t for the given environment variable name.
// CRTP override
void entry_was_evicted(wcstring key, autoload_function_t node);
// Create an autoload_t for the given environment variable name.
autoload_t(const wcstring &env_var_name_var, const builtin_script_t *scripts,
size_t script_count);

View file

@ -1122,20 +1122,14 @@ static void test_escape_sequences(void) {
err(L"test_escape_sequences failed on line %d\n", __LINE__);
}
class lru_node_test_t : public lru_node_t {
class test_lru_t : public lru_cache_t<test_lru_t, int> {
public:
explicit lru_node_test_t(const wcstring &tmp) : lru_node_t(tmp) {}
};
test_lru_t() : lru_cache_t<test_lru_t, int>(16) {}
class test_lru_t : public lru_cache_t<lru_node_test_t> {
public:
test_lru_t() : lru_cache_t<lru_node_test_t>(16) {}
std::vector<std::pair<wcstring, int>> evicted;
std::vector<lru_node_test_t *> evicted_nodes;
virtual void node_was_evicted(lru_node_test_t *node) {
do_test(find(evicted_nodes.begin(), evicted_nodes.end(), node) == evicted_nodes.end());
evicted_nodes.push_back(node);
void entry_was_evicted(wcstring key, int val) {
evicted.push_back({key, val});
}
};
@ -1143,24 +1137,18 @@ static void test_lru(void) {
say(L"Testing LRU cache");
test_lru_t cache;
std::vector<lru_node_test_t *> expected_evicted;
size_t total_nodes = 20;
for (size_t i = 0; i < total_nodes; i++) {
do_test(cache.size() == std::min(i, (size_t)16));
lru_node_test_t *node = new lru_node_test_t(to_string(i));
if (i < 4) expected_evicted.push_back(node);
std::vector<std::pair<wcstring, int>> expected_evicted;
int total_nodes = 20;
for (int i = 0; i < total_nodes; i++) {
do_test(cache.size() == size_t(std::min(i, 16)));
if (i < 4) expected_evicted.push_back({to_string(i), i});
// Adding the node the first time should work, and subsequent times should fail.
do_test(cache.add_node(node));
do_test(!cache.add_node(node));
do_test(cache.insert(to_string(i), i));
do_test(!cache.insert(to_string(i), i+1));
}
do_test(cache.evicted_nodes == expected_evicted);
do_test(cache.evicted == expected_evicted);
cache.evict_all_nodes();
do_test(cache.evicted_nodes.size() == total_nodes);
while (!cache.evicted_nodes.empty()) {
lru_node_t *node = cache.evicted_nodes.back();
cache.evicted_nodes.pop_back();
delete node;
}
do_test(cache.evicted.size() == size_t(total_nodes));
}
/// Perform parameter expansion and test if the output equals the zero-terminated parameter list

View file

@ -142,23 +142,22 @@ static bool history_file_lock(int fd, int lock_type) {
/// Our LRU cache is used for restricting the amount of history we have, and limiting how long we
/// order it.
class history_lru_node_t : public lru_node_t {
class history_lru_item_t {
public:
wcstring text;
time_t timestamp;
path_list_t required_paths;
explicit history_lru_node_t(const history_item_t &item)
: lru_node_t(item.str()),
explicit history_lru_item_t(const history_item_t &item)
: text(item.str()),
timestamp(item.timestamp()),
required_paths(item.get_required_paths()) {}
};
class history_lru_cache_t : public lru_cache_t<history_lru_node_t> {
protected:
/// Override to delete evicted nodes.
virtual void node_was_evicted(history_lru_node_t *node) { delete node; }
class history_lru_cache_t : public lru_cache_t<history_lru_cache_t, history_lru_item_t> {
typedef lru_cache_t<history_lru_cache_t, history_lru_item_t> super;
public:
explicit history_lru_cache_t(size_t max) : lru_cache_t<history_lru_node_t>(max) {}
using super::super;
/// Function to add a history item.
void add_item(const history_item_t &item) {
@ -167,10 +166,10 @@ class history_lru_cache_t : public lru_cache_t<history_lru_node_t> {
// See if it's in the cache. If it is, update the timestamp. If not, we create a new node
// and add it. Note that calling get_node promotes the node to the front.
history_lru_node_t *node = this->get_node(item.str());
wcstring key = item.str();
history_lru_item_t *node = this->get(key);
if (node == NULL) {
node = new history_lru_node_t(item);
this->add_node(node);
this->insert(std::move(key), history_lru_item_t(item));
} else {
node->timestamp = std::max(node->timestamp, item.timestamp());
// What to do about paths here? Let's just ignore them.
@ -1248,8 +1247,9 @@ bool history_t::save_internal_via_rewrite() {
bool errored = false;
history_output_buffer_t buffer;
for (history_lru_cache_t::iterator iter = lru.begin(); iter != lru.end(); ++iter) {
const history_lru_node_t *node = *iter;
append_yaml_to_buffer(node->key, node->timestamp, node->required_paths, &buffer);
const wcstring &text = (*iter).first;
const history_lru_item_t &item = (*iter).second;
append_yaml_to_buffer(text, item.timestamp, item.required_paths, &buffer);
if (buffer.output_size() >= HISTORY_OUTPUT_BUFFER_SIZE &&
!buffer.flush_to_fd(out_fd)) {
errored = true;

258
src/lru.h
View file

@ -4,190 +4,204 @@
#include <assert.h>
#include <wchar.h>
#include <list>
#include <map>
#include <set>
#include "common.h"
/// A predicate to compare dereferenced pointers.
struct dereference_less_t {
template <typename ptr_t>
bool operator()(ptr_t p1, ptr_t p2) const {
return *p1 < *p2;
}
};
class lru_node_t {
template <class T>
friend class lru_cache_t;
/// Our linked list pointer.
lru_node_t *prev, *next;
public:
/// The key used to look up in the cache.
const wcstring key;
/// Constructor.
explicit lru_node_t(const wcstring &pkey) : prev(NULL), next(NULL), key(pkey) {}
/// Virtual destructor that does nothing for classes that inherit lru_node_t.
virtual ~lru_node_t() {}
/// operator< for std::set
bool operator<(const lru_node_t &other) const { return key < other.key; }
};
template <class node_type_t>
// Least-recently-used cache class
// This a map from wcstring to CONTENTS, that will evict entries when the count exceeds the maximum.
// It uses CRTP to inform clients when entries are evicted. This uses the classic LRU cache structure:
// a dictionary mapping keys to nodes, where the nodes also form a linked list. Our linked list is
// circular and has a sentinel node (the "mouth" - picture a snake swallowing its tail). This simplifies
// the logic: no pointer is ever NULL! It also works well with C++'s iterator since the sentinel node
// is a natural value for end(). Our nodes also have the unusual property of having a "back pointer":
// they store an iterator to the entry in the map containing the node. This allows us, given a node, to
// immediately locate the node and its key in the dictionary. This allows us to avoid duplicating the key
// in the node.
template <class DERIVED, class CONTENTS>
class lru_cache_t {
private:
/// Max node count. This may be (transiently) exceeded by add_node_without_eviction, which is
/// used from background threads.
struct lru_node_t;
typedef typename std::map<wcstring, lru_node_t>::iterator node_iter_t;
struct lru_link_t {
// Our doubly linked list
// The base class is used for the mouth only
lru_link_t *prev = NULL;
lru_link_t *next = NULL;
};
// The node type in our LRU cache
struct lru_node_t : public lru_link_t {
// No copying
lru_node_t(const lru_node_t &) = delete;
lru_node_t &operator=(const lru_node_t &) = delete;
lru_node_t(lru_node_t &&) = default;
// Our location in the map!
node_iter_t iter;
// The value from the client
CONTENTS value;
explicit lru_node_t(CONTENTS v) :
value(std::move(v))
{}
};
// Max node count. This may be (transiently) exceeded by add_node_without_eviction, which is
// used from background threads.
const size_t max_node_count;
/// Count of nodes.
size_t node_count;
// All of our nodes
// Note that our linked list contains pointers to these nodes in the map
// We are dependent on the iterator-noninvalidation guarantees of std::map
std::map<wcstring, lru_node_t> node_map;
/// The set of nodes.
typedef std::set<lru_node_t *, dereference_less_t> node_set_t;
node_set_t node_set;
// Head of the linked list
// The list is circular!
// If "empty" the mouth just points at itself.
lru_link_t mouth;
void promote_node(node_type_t *node) {
// We should never promote the mouth.
// Take a node and move it to the front of the list
void promote_node(lru_node_t *node) {
assert(node != &mouth);
// First unhook us.
// First unhook us
node->prev->next = node->next;
node->next->prev = node->prev;
// Put us after the mouth.
// Put us after the mouth
node->next = mouth.next;
node->next->prev = node;
node->prev = &mouth;
mouth.next = node;
}
void evict_node(node_type_t *condemned_node) {
// Remove the node
void evict_node(lru_node_t *node) {
assert(node != &mouth);
// We should never evict the mouth.
assert(condemned_node != NULL && condemned_node != &mouth);
assert(node != NULL && node->iter != this->node_map.end());
// Remove it from the linked list.
condemned_node->prev->next = condemned_node->next;
condemned_node->next->prev = condemned_node->prev;
node->prev->next = node->next;
node->next->prev = node->prev;
// Remove us from the set.
node_set.erase(condemned_node);
node_count--;
// Pull out our key and value
wcstring key = std::move(node->iter->first);
CONTENTS value(std::move(node->value));
// Tell ourselves.
this->node_was_evicted(condemned_node);
// Remove us from the map. This deallocates node!
node_map.erase(node->iter);
node = NULL;
// Tell ourselves what we did
DERIVED *dthis = static_cast<DERIVED *>(this);
dthis->entry_was_evicted(std::move(key), std::move(value));
}
void evict_last_node(void) { evict_node((node_type_t *)mouth.prev); }
// Evicts the last node
void evict_last_node() {
assert(mouth.prev != &mouth);
evict_node(static_cast<lru_node_t *>(mouth.prev));
}
static lru_node_t *get_previous(lru_node_t *node) { return node->prev; }
protected:
/// Head of the linked list.
lru_node_t mouth;
/// Overridable callback for when a node is evicted.
virtual void node_was_evicted(node_type_t *node) { UNUSED(node); }
// CRTP callback for when a node is evicted.
// Clients can implement this
void entry_was_evicted(wcstring key, CONTENTS value) {
USE(key);
USE(value);
}
public:
/// Constructor
explicit lru_cache_t(size_t max_size = 1024)
: max_node_count(max_size), node_count(0), mouth(wcstring()) {
// Hook up the mouth to itself: a one node circularly linked list!
mouth.prev = mouth.next = &mouth;
// Constructor
// Note our linked list is always circular!
explicit lru_cache_t(size_t max_size = 1024) : max_node_count(max_size) {
mouth.next = mouth.prev = &mouth;
}
/// Note that we do not evict nodes in our destructor (even though they typically need to be
/// deleted by their creator).
virtual ~lru_cache_t() {}
/// Returns the node for a given key, or NULL.
node_type_t *get_node(const wcstring &key) {
node_type_t *result = NULL;
// Construct a fake node as our key.
lru_node_t node_key(key);
// Look for it in the set.
node_set_t::iterator iter = node_set.find(&node_key);
// If we found a node, promote and return it.
if (iter != node_set.end()) {
result = static_cast<node_type_t *>(*iter);
promote_node(result);
// Returns the value for a given key, or NULL.
// This counts as a "use" and so promotes the node
CONTENTS *get(const wcstring &key) {
auto where = this->node_map.find(key);
if (where == this->node_map.end()) {
// not found
return NULL;
}
return result;
promote_node(&where->second);
return &where->second.value;
}
/// Evicts the node for a given key, returning true if a node was evicted.
// Evicts the node for a given key, returning true if a node was evicted.
bool evict_node(const wcstring &key) {
// Construct a fake node as our key.
lru_node_t node_key(key);
// Look for it in the set.
node_set_t::iterator iter = node_set.find(&node_key);
if (iter == node_set.end()) return false;
// Evict the given node.
evict_node(static_cast<node_type_t *>(*iter));
auto where = this->node_map.find(key);
if (where == this->node_map.end()) return false;
evict_node(&where->second);
return true;
}
/// Adds a node under the given key. Returns true if the node was added, false if the node was
/// not because a node with that key is already in the set.
bool add_node(node_type_t *node) {
// Add our node without eviction.
if (!this->add_node_without_eviction(node)) return false;
// Adds a node under the given key. Returns true if the node was added, false if the node was
// not because a node with that key is already in the set.
bool insert(wcstring key, CONTENTS value) {
if (! this->insert_no_eviction(std::move(key), std::move(value))) {
return false;
}
while (node_count > max_node_count) evict_last_node(); // evict
while (this->node_map.size() > max_node_count) {
evict_last_node();
}
return true;
}
/// Adds a node under the given key without triggering eviction. Returns true if the node was
/// added, false if the node was not because a node with that key is already in the set.
bool add_node_without_eviction(node_type_t *node) {
assert(node != NULL && node != &mouth);
// Adds a node under the given key without triggering eviction. Returns true if the node was
// added, false if the node was not because a node with that key is already in the set.
bool insert_no_eviction(wcstring key, CONTENTS value) {
// Try inserting; return false if it was already in the set.
if (!node_set.insert(node).second) return false;
auto iter_inserted = this->node_map.emplace(std::move(key), lru_node_t(std::move(value)));
if (! iter_inserted.second) {
// already present
return false;
}
// Tell the node where it is in the map
node_iter_t iter = iter_inserted.first;
lru_node_t *node = &iter->second;
node->iter = iter;
// Add the node after the mouth.
node->next = mouth.next;
node->next->prev = node;
node->prev = &mouth;
mouth.next = node;
// Update the count. This may push us over the maximum node count.
node_count++;
return true;
}
/// Counts nodes.
size_t size(void) { return node_count; }
// Number of entries
size_t size() { return this->node_map.size(); }
/// Evicts all nodes.
void evict_all_nodes(void) {
while (node_count > 0) {
while (this->size() > 0) {
evict_last_node();
}
}
/// Iterator for walking nodes, from least recently used to most.
// Iterator for walking nodes, from least recently used to most.
class iterator {
lru_node_t *node;
lru_link_t *node;
public:
explicit iterator(lru_node_t *val) : node(val) {}
void operator++() { node = lru_cache_t::get_previous(node); }
typedef std::pair<const wcstring &, const CONTENTS &> value_type;
explicit iterator(lru_link_t *val) : node(val) {}
void operator++() { node = node->prev; }
bool operator==(const iterator &other) { return node == other.node; }
bool operator!=(const iterator &other) { return !(*this == other); }
node_type_t *operator*() { return static_cast<node_type_t *>(node); }
value_type operator*() const {
const lru_node_t *dnode = static_cast<const lru_node_t *>(node);
const wcstring &key = dnode->iter->first;
return {key, dnode->value};
}
};
iterator begin() { return iterator(mouth.prev); }