From e52a04e341022d80db961902f754e68a83715abb Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 27 Jan 2017 12:18:16 -0800 Subject: [PATCH] Cleanup of LRU cache implementation Switch to CRTP from virtual functions and improve ownership semantics. It's no longer necessary for clients to use new and delete. --- src/autoload.cpp | 31 +++--- src/autoload.h | 21 ++-- src/fish_tests.cpp | 40 +++---- src/history.cpp | 28 ++--- src/lru.h | 258 ++++++++++++++++++++++++--------------------- 5 files changed, 191 insertions(+), 187 deletions(-) diff --git a/src/autoload.cpp b/src/autoload.cpp index 39cd3b515..c2423785b 100644 --- a/src/autoload.cpp +++ b/src/autoload.cpp @@ -57,13 +57,12 @@ autoload_t::autoload_t(const wcstring &env_var_name_var, const builtin_script_t autoload_t::~autoload_t() { pthread_mutex_destroy(&lock); } -void autoload_t::node_was_evicted(autoload_function_t *node) { +void autoload_t::entry_was_evicted(wcstring key, autoload_function_t node) { // This should only ever happen on the main thread. ASSERT_IS_MAIN_THREAD(); // Tell ourselves that the command was removed if it was loaded. - if (node->is_loaded) this->command_removed(node->key); - delete node; + if (node.is_loaded) this->command_removed(std::move(key)); } int autoload_t::unload(const wcstring &cmd) { return this->evict_node(cmd); } @@ -130,7 +129,7 @@ static bool script_name_precedes_script_name(const builtin_script_t &script1, /// Check whether the given command is loaded. bool autoload_t::has_tried_loading(const wcstring &cmd) { scoped_lock locker(lock); - autoload_function_t *func = this->get_node(cmd); + autoload_function_t *func = this->get(cmd); return func != NULL; } @@ -144,14 +143,16 @@ static bool is_stale(const autoload_function_t *func) { autoload_function_t *autoload_t::get_autoloaded_function_with_creation(const wcstring &cmd, bool allow_eviction) { ASSERT_IS_LOCKED(lock); - autoload_function_t *func = this->get_node(cmd); + autoload_function_t *func = this->get(cmd); if (!func) { - func = new autoload_function_t(cmd); + bool added; if (allow_eviction) { - this->add_node(func); + added = this->insert(cmd, autoload_function_t(false)); } else { - this->add_node_without_eviction(func); + added = this->insert_no_eviction(cmd, autoload_function_t(false)); } + func = this->get(cmd); + assert(func); } return func; } @@ -188,7 +189,7 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_ { bool allow_stale_functions = !reload; scoped_lock locker(lock); - autoload_function_t *func = this->get_node(cmd); // get the function + autoload_function_t *func = this->get(cmd); // get the function // If we can use this function, return whether we were able to access it. if (use_cached(func, really_load, allow_stale_functions)) { @@ -243,7 +244,7 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_ // Now we're actually going to take the lock. scoped_lock locker(lock); - autoload_function_t *func = this->get_node(cmd); + autoload_function_t *func = this->get(cmd); // Generate the source if we need to load it. bool need_to_load_function = @@ -286,15 +287,15 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_ if (!found_file && !has_script_source) { scoped_lock locker(lock); // Generate a placeholder. - autoload_function_t *func = this->get_node(cmd); + autoload_function_t *func = this->get(cmd); if (!func) { - func = new autoload_function_t(cmd); - func->is_placeholder = true; if (really_load) { - this->add_node(func); + this->insert(cmd, autoload_function_t(true)); } else { - this->add_node_without_eviction(func); + this->insert(cmd, autoload_function_t(true)); } + func = this->get(cmd); + assert(func); } func->access.last_checked = time(NULL); } diff --git a/src/autoload.h b/src/autoload.h index 49891828f..d874b4688 100644 --- a/src/autoload.h +++ b/src/autoload.h @@ -25,12 +25,11 @@ struct file_access_attempt_t { }; file_access_attempt_t access_file(const wcstring &path, int mode); -struct autoload_function_t : public lru_node_t { - explicit autoload_function_t(const wcstring &key) - : lru_node_t(key), - access(), +struct autoload_function_t { + explicit autoload_function_t(bool placeholder) + : access(), is_loaded(false), - is_placeholder(false), + is_placeholder(placeholder), is_internalized(false) {} /// The last access attempt recorded @@ -52,7 +51,7 @@ struct builtin_script_t { class env_vars_snapshot_t; /// Class representing a path from which we can autoload and the autoloaded contents. -class autoload_t : private lru_cache_t { +class autoload_t : public lru_cache_t { private: /// Lock for thread safety. pthread_mutex_t lock; @@ -70,13 +69,11 @@ class autoload_t : private lru_cache_t { /// This is here to help prevent recursion. std::set is_loading_set; - void remove_all_functions(void) { this->evict_all_nodes(); } + void remove_all_functions() { this->evict_all_nodes(); } bool locate_file_and_maybe_load_it(const wcstring &cmd, bool really_load, bool reload, const wcstring_list_t &path_list); - virtual void node_was_evicted(autoload_function_t *node); - autoload_function_t *get_autoloaded_function_with_creation(const wcstring &cmd, bool allow_eviction); @@ -85,7 +82,11 @@ class autoload_t : private lru_cache_t { virtual void command_removed(const wcstring &cmd) { UNUSED(cmd); } public: - /// Create an autoload_t for the given environment variable name. + + // CRTP override + void entry_was_evicted(wcstring key, autoload_function_t node); + + // Create an autoload_t for the given environment variable name. autoload_t(const wcstring &env_var_name_var, const builtin_script_t *scripts, size_t script_count); diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 3f01c28a3..f162c9d07 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -1122,20 +1122,14 @@ static void test_escape_sequences(void) { err(L"test_escape_sequences failed on line %d\n", __LINE__); } -class lru_node_test_t : public lru_node_t { +class test_lru_t : public lru_cache_t { public: - explicit lru_node_test_t(const wcstring &tmp) : lru_node_t(tmp) {} -}; + test_lru_t() : lru_cache_t(16) {} -class test_lru_t : public lru_cache_t { - public: - test_lru_t() : lru_cache_t(16) {} + std::vector> evicted; - std::vector evicted_nodes; - - virtual void node_was_evicted(lru_node_test_t *node) { - do_test(find(evicted_nodes.begin(), evicted_nodes.end(), node) == evicted_nodes.end()); - evicted_nodes.push_back(node); + void entry_was_evicted(wcstring key, int val) { + evicted.push_back({key, val}); } }; @@ -1143,24 +1137,18 @@ static void test_lru(void) { say(L"Testing LRU cache"); test_lru_t cache; - std::vector expected_evicted; - size_t total_nodes = 20; - for (size_t i = 0; i < total_nodes; i++) { - do_test(cache.size() == std::min(i, (size_t)16)); - lru_node_test_t *node = new lru_node_test_t(to_string(i)); - if (i < 4) expected_evicted.push_back(node); + std::vector> expected_evicted; + int total_nodes = 20; + for (int i = 0; i < total_nodes; i++) { + do_test(cache.size() == size_t(std::min(i, 16))); + if (i < 4) expected_evicted.push_back({to_string(i), i}); // Adding the node the first time should work, and subsequent times should fail. - do_test(cache.add_node(node)); - do_test(!cache.add_node(node)); + do_test(cache.insert(to_string(i), i)); + do_test(!cache.insert(to_string(i), i+1)); } - do_test(cache.evicted_nodes == expected_evicted); + do_test(cache.evicted == expected_evicted); cache.evict_all_nodes(); - do_test(cache.evicted_nodes.size() == total_nodes); - while (!cache.evicted_nodes.empty()) { - lru_node_t *node = cache.evicted_nodes.back(); - cache.evicted_nodes.pop_back(); - delete node; - } + do_test(cache.evicted.size() == size_t(total_nodes)); } /// Perform parameter expansion and test if the output equals the zero-terminated parameter list diff --git a/src/history.cpp b/src/history.cpp index d871c9982..5b80d0676 100644 --- a/src/history.cpp +++ b/src/history.cpp @@ -142,23 +142,22 @@ static bool history_file_lock(int fd, int lock_type) { /// Our LRU cache is used for restricting the amount of history we have, and limiting how long we /// order it. -class history_lru_node_t : public lru_node_t { +class history_lru_item_t { public: + wcstring text; time_t timestamp; path_list_t required_paths; - explicit history_lru_node_t(const history_item_t &item) - : lru_node_t(item.str()), + explicit history_lru_item_t(const history_item_t &item) + : text(item.str()), timestamp(item.timestamp()), required_paths(item.get_required_paths()) {} }; -class history_lru_cache_t : public lru_cache_t { - protected: - /// Override to delete evicted nodes. - virtual void node_was_evicted(history_lru_node_t *node) { delete node; } - +class history_lru_cache_t : public lru_cache_t { + typedef lru_cache_t super; public: - explicit history_lru_cache_t(size_t max) : lru_cache_t(max) {} + + using super::super; /// Function to add a history item. void add_item(const history_item_t &item) { @@ -167,10 +166,10 @@ class history_lru_cache_t : public lru_cache_t { // See if it's in the cache. If it is, update the timestamp. If not, we create a new node // and add it. Note that calling get_node promotes the node to the front. - history_lru_node_t *node = this->get_node(item.str()); + wcstring key = item.str(); + history_lru_item_t *node = this->get(key); if (node == NULL) { - node = new history_lru_node_t(item); - this->add_node(node); + this->insert(std::move(key), history_lru_item_t(item)); } else { node->timestamp = std::max(node->timestamp, item.timestamp()); // What to do about paths here? Let's just ignore them. @@ -1248,8 +1247,9 @@ bool history_t::save_internal_via_rewrite() { bool errored = false; history_output_buffer_t buffer; for (history_lru_cache_t::iterator iter = lru.begin(); iter != lru.end(); ++iter) { - const history_lru_node_t *node = *iter; - append_yaml_to_buffer(node->key, node->timestamp, node->required_paths, &buffer); + const wcstring &text = (*iter).first; + const history_lru_item_t &item = (*iter).second; + append_yaml_to_buffer(text, item.timestamp, item.required_paths, &buffer); if (buffer.output_size() >= HISTORY_OUTPUT_BUFFER_SIZE && !buffer.flush_to_fd(out_fd)) { errored = true; diff --git a/src/lru.h b/src/lru.h index 4a7c2dcff..3b9b4d148 100644 --- a/src/lru.h +++ b/src/lru.h @@ -4,190 +4,204 @@ #include #include -#include #include -#include #include "common.h" -/// A predicate to compare dereferenced pointers. -struct dereference_less_t { - template - bool operator()(ptr_t p1, ptr_t p2) const { - return *p1 < *p2; - } -}; - -class lru_node_t { - template - friend class lru_cache_t; - - /// Our linked list pointer. - lru_node_t *prev, *next; - - public: - /// The key used to look up in the cache. - const wcstring key; - - /// Constructor. - explicit lru_node_t(const wcstring &pkey) : prev(NULL), next(NULL), key(pkey) {} - - /// Virtual destructor that does nothing for classes that inherit lru_node_t. - virtual ~lru_node_t() {} - - /// operator< for std::set - bool operator<(const lru_node_t &other) const { return key < other.key; } -}; - -template +// Least-recently-used cache class +// This a map from wcstring to CONTENTS, that will evict entries when the count exceeds the maximum. +// It uses CRTP to inform clients when entries are evicted. This uses the classic LRU cache structure: +// a dictionary mapping keys to nodes, where the nodes also form a linked list. Our linked list is +// circular and has a sentinel node (the "mouth" - picture a snake swallowing its tail). This simplifies +// the logic: no pointer is ever NULL! It also works well with C++'s iterator since the sentinel node +// is a natural value for end(). Our nodes also have the unusual property of having a "back pointer": +// they store an iterator to the entry in the map containing the node. This allows us, given a node, to +// immediately locate the node and its key in the dictionary. This allows us to avoid duplicating the key +// in the node. +template class lru_cache_t { - private: - /// Max node count. This may be (transiently) exceeded by add_node_without_eviction, which is - /// used from background threads. + + struct lru_node_t; + typedef typename std::map::iterator node_iter_t; + + struct lru_link_t { + // Our doubly linked list + // The base class is used for the mouth only + lru_link_t *prev = NULL; + lru_link_t *next = NULL; + }; + + // The node type in our LRU cache + struct lru_node_t : public lru_link_t { + // No copying + lru_node_t(const lru_node_t &) = delete; + lru_node_t &operator=(const lru_node_t &) = delete; + lru_node_t(lru_node_t &&) = default; + + // Our location in the map! + node_iter_t iter; + + // The value from the client + CONTENTS value; + + explicit lru_node_t(CONTENTS v) : + value(std::move(v)) + {} + }; + + + // Max node count. This may be (transiently) exceeded by add_node_without_eviction, which is + // used from background threads. const size_t max_node_count; - /// Count of nodes. - size_t node_count; + // All of our nodes + // Note that our linked list contains pointers to these nodes in the map + // We are dependent on the iterator-noninvalidation guarantees of std::map + std::map node_map; - /// The set of nodes. - typedef std::set node_set_t; - node_set_t node_set; + // Head of the linked list + // The list is circular! + // If "empty" the mouth just points at itself. + lru_link_t mouth; - void promote_node(node_type_t *node) { - // We should never promote the mouth. + // Take a node and move it to the front of the list + void promote_node(lru_node_t *node) { assert(node != &mouth); - // First unhook us. + // First unhook us node->prev->next = node->next; node->next->prev = node->prev; - // Put us after the mouth. + // Put us after the mouth node->next = mouth.next; node->next->prev = node; node->prev = &mouth; mouth.next = node; } - void evict_node(node_type_t *condemned_node) { + // Remove the node + void evict_node(lru_node_t *node) { + assert(node != &mouth); + // We should never evict the mouth. - assert(condemned_node != NULL && condemned_node != &mouth); + assert(node != NULL && node->iter != this->node_map.end()); // Remove it from the linked list. - condemned_node->prev->next = condemned_node->next; - condemned_node->next->prev = condemned_node->prev; + node->prev->next = node->next; + node->next->prev = node->prev; - // Remove us from the set. - node_set.erase(condemned_node); - node_count--; + // Pull out our key and value + wcstring key = std::move(node->iter->first); + CONTENTS value(std::move(node->value)); - // Tell ourselves. - this->node_was_evicted(condemned_node); + // Remove us from the map. This deallocates node! + node_map.erase(node->iter); + node = NULL; + + // Tell ourselves what we did + DERIVED *dthis = static_cast(this); + dthis->entry_was_evicted(std::move(key), std::move(value)); } - void evict_last_node(void) { evict_node((node_type_t *)mouth.prev); } + // Evicts the last node + void evict_last_node() { + assert(mouth.prev != &mouth); + evict_node(static_cast(mouth.prev)); + } - static lru_node_t *get_previous(lru_node_t *node) { return node->prev; } - - protected: - /// Head of the linked list. - lru_node_t mouth; - - /// Overridable callback for when a node is evicted. - virtual void node_was_evicted(node_type_t *node) { UNUSED(node); } + // CRTP callback for when a node is evicted. + // Clients can implement this + void entry_was_evicted(wcstring key, CONTENTS value) { + USE(key); + USE(value); + } public: - /// Constructor - explicit lru_cache_t(size_t max_size = 1024) - : max_node_count(max_size), node_count(0), mouth(wcstring()) { - // Hook up the mouth to itself: a one node circularly linked list! - mouth.prev = mouth.next = &mouth; + // Constructor + // Note our linked list is always circular! + explicit lru_cache_t(size_t max_size = 1024) : max_node_count(max_size) { + mouth.next = mouth.prev = &mouth; } - /// Note that we do not evict nodes in our destructor (even though they typically need to be - /// deleted by their creator). - virtual ~lru_cache_t() {} - - /// Returns the node for a given key, or NULL. - node_type_t *get_node(const wcstring &key) { - node_type_t *result = NULL; - - // Construct a fake node as our key. - lru_node_t node_key(key); - - // Look for it in the set. - node_set_t::iterator iter = node_set.find(&node_key); - - // If we found a node, promote and return it. - if (iter != node_set.end()) { - result = static_cast(*iter); - promote_node(result); + // Returns the value for a given key, or NULL. + // This counts as a "use" and so promotes the node + CONTENTS *get(const wcstring &key) { + auto where = this->node_map.find(key); + if (where == this->node_map.end()) { + // not found + return NULL; } - return result; + promote_node(&where->second); + return &where->second.value; } - /// Evicts the node for a given key, returning true if a node was evicted. + // Evicts the node for a given key, returning true if a node was evicted. bool evict_node(const wcstring &key) { - // Construct a fake node as our key. - lru_node_t node_key(key); - - // Look for it in the set. - node_set_t::iterator iter = node_set.find(&node_key); - if (iter == node_set.end()) return false; - - // Evict the given node. - evict_node(static_cast(*iter)); + auto where = this->node_map.find(key); + if (where == this->node_map.end()) return false; + evict_node(&where->second); return true; } - /// Adds a node under the given key. Returns true if the node was added, false if the node was - /// not because a node with that key is already in the set. - bool add_node(node_type_t *node) { - // Add our node without eviction. - if (!this->add_node_without_eviction(node)) return false; + // Adds a node under the given key. Returns true if the node was added, false if the node was + // not because a node with that key is already in the set. + bool insert(wcstring key, CONTENTS value) { + if (! this->insert_no_eviction(std::move(key), std::move(value))) { + return false; + } - while (node_count > max_node_count) evict_last_node(); // evict + while (this->node_map.size() > max_node_count) { + evict_last_node(); + } return true; } - /// Adds a node under the given key without triggering eviction. Returns true if the node was - /// added, false if the node was not because a node with that key is already in the set. - bool add_node_without_eviction(node_type_t *node) { - assert(node != NULL && node != &mouth); - + // Adds a node under the given key without triggering eviction. Returns true if the node was + // added, false if the node was not because a node with that key is already in the set. + bool insert_no_eviction(wcstring key, CONTENTS value) { // Try inserting; return false if it was already in the set. - if (!node_set.insert(node).second) return false; + auto iter_inserted = this->node_map.emplace(std::move(key), lru_node_t(std::move(value))); + if (! iter_inserted.second) { + // already present + return false; + } + + // Tell the node where it is in the map + node_iter_t iter = iter_inserted.first; + lru_node_t *node = &iter->second; + node->iter = iter; - // Add the node after the mouth. node->next = mouth.next; node->next->prev = node; node->prev = &mouth; mouth.next = node; - - // Update the count. This may push us over the maximum node count. - node_count++; return true; } - /// Counts nodes. - size_t size(void) { return node_count; } + // Number of entries + size_t size() { return this->node_map.size(); } - /// Evicts all nodes. void evict_all_nodes(void) { - while (node_count > 0) { + while (this->size() > 0) { evict_last_node(); } } - /// Iterator for walking nodes, from least recently used to most. + // Iterator for walking nodes, from least recently used to most. class iterator { - lru_node_t *node; - + lru_link_t *node; public: - explicit iterator(lru_node_t *val) : node(val) {} - void operator++() { node = lru_cache_t::get_previous(node); } + typedef std::pair value_type; + + explicit iterator(lru_link_t *val) : node(val) {} + void operator++() { node = node->prev; } bool operator==(const iterator &other) { return node == other.node; } bool operator!=(const iterator &other) { return !(*this == other); } - node_type_t *operator*() { return static_cast(node); } + value_type operator*() const { + const lru_node_t *dnode = static_cast(node); + const wcstring &key = dnode->iter->first; + return {key, dnode->value}; + } }; iterator begin() { return iterator(mouth.prev); }