Cleanup of LRU cache implementation

Switch to CRTP from virtual functions and improve ownership semantics. It's no longer necessary for clients to use new and delete.
2024-12-26 04:43:10 +00:00 · 2017-01-27 12:18:16 -08:00 · 2017-01-27 12:18:16 -08:00 · e52a04e341
commit e52a04e341
parent 61f272495d
5 changed files with 191 additions and 187 deletions
--- a/src/autoload.cpp
+++ b/src/autoload.cpp
@ -57,13 +57,12 @@ autoload_t::autoload_t(const wcstring &env_var_name_var, const builtin_script_t

 autoload_t::~autoload_t() { pthread_mutex_destroy(&lock); }

-void autoload_t::node_was_evicted(autoload_function_t *node) {
+void autoload_t::entry_was_evicted(wcstring key, autoload_function_t node) {
    // This should only ever happen on the main thread.
    ASSERT_IS_MAIN_THREAD();

    // Tell ourselves that the command was removed if it was loaded.
-    if (node->is_loaded) this->command_removed(node->key);
-    delete node;
+    if (node.is_loaded) this->command_removed(std::move(key));
 }

 int autoload_t::unload(const wcstring &cmd) { return this->evict_node(cmd); }
@ -130,7 +129,7 @@ static bool script_name_precedes_script_name(const builtin_script_t &script1,
 /// Check whether the given command is loaded.
 bool autoload_t::has_tried_loading(const wcstring &cmd) {
    scoped_lock locker(lock);
-    autoload_function_t *func = this->get_node(cmd);
+    autoload_function_t *func = this->get(cmd);
    return func != NULL;
 }

@ -144,14 +143,16 @@ static bool is_stale(const autoload_function_t *func) {
 autoload_function_t *autoload_t::get_autoloaded_function_with_creation(const wcstring &cmd,
                                                                       bool allow_eviction) {
    ASSERT_IS_LOCKED(lock);
-    autoload_function_t *func = this->get_node(cmd);
+    autoload_function_t *func = this->get(cmd);
    if (!func) {
-        func = new autoload_function_t(cmd);
+        bool added;
        if (allow_eviction) {
-            this->add_node(func);
+            added = this->insert(cmd, autoload_function_t(false));
        } else {
-            this->add_node_without_eviction(func);
+            added = this->insert_no_eviction(cmd, autoload_function_t(false));
        }
+        func = this->get(cmd);
+        assert(func);
    }
    return func;
 }
@ -188,7 +189,7 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_
    {
        bool allow_stale_functions = !reload;
        scoped_lock locker(lock);
-        autoload_function_t *func = this->get_node(cmd);  // get the function
+        autoload_function_t *func = this->get(cmd);  // get the function

        // If we can use this function, return whether we were able to access it.
        if (use_cached(func, really_load, allow_stale_functions)) {
@ -243,7 +244,7 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_

            // Now we're actually going to take the lock.
            scoped_lock locker(lock);
-            autoload_function_t *func = this->get_node(cmd);
+            autoload_function_t *func = this->get(cmd);

            // Generate the source if we need to load it.
            bool need_to_load_function =
@ -286,15 +287,15 @@ bool autoload_t::locate_file_and_maybe_load_it(const wcstring &cmd, bool really_
        if (!found_file && !has_script_source) {
            scoped_lock locker(lock);
            // Generate a placeholder.
-            autoload_function_t *func = this->get_node(cmd);
+            autoload_function_t *func = this->get(cmd);
            if (!func) {
-                func = new autoload_function_t(cmd);
-                func->is_placeholder = true;
                if (really_load) {
-                    this->add_node(func);
+                    this->insert(cmd, autoload_function_t(true));
                } else {
-                    this->add_node_without_eviction(func);
+                    this->insert(cmd, autoload_function_t(true));
                }
+                func = this->get(cmd);
+                assert(func);
            }
            func->access.last_checked = time(NULL);
        }
--- a/src/autoload.h
+++ b/src/autoload.h
@ -25,12 +25,11 @@ struct file_access_attempt_t {
 };
 file_access_attempt_t access_file(const wcstring &path, int mode);

-struct autoload_function_t : public lru_node_t {
-    explicit autoload_function_t(const wcstring &key)
-        : lru_node_t(key),
-          access(),
+struct autoload_function_t {
+    explicit autoload_function_t(bool placeholder)
+        : access(),
          is_loaded(false),
-          is_placeholder(false),
+          is_placeholder(placeholder),
          is_internalized(false) {}

    /// The last access attempt recorded
@ -52,7 +51,7 @@ struct builtin_script_t {
 class env_vars_snapshot_t;

 /// Class representing a path from which we can autoload and the autoloaded contents.
-class autoload_t : private lru_cache_t<autoload_function_t> {
+class autoload_t : public lru_cache_t<autoload_t, autoload_function_t> {
   private:
    /// Lock for thread safety.
    pthread_mutex_t lock;
@ -70,13 +69,11 @@ class autoload_t : private lru_cache_t<autoload_function_t> {
    /// This is here to help prevent recursion.
    std::set<wcstring> is_loading_set;

-    void remove_all_functions(void) { this->evict_all_nodes(); }
+    void remove_all_functions() { this->evict_all_nodes(); }

    bool locate_file_and_maybe_load_it(const wcstring &cmd, bool really_load, bool reload,
                                       const wcstring_list_t &path_list);

-    virtual void node_was_evicted(autoload_function_t *node);
-
    autoload_function_t *get_autoloaded_function_with_creation(const wcstring &cmd,
                                                               bool allow_eviction);

@ -85,7 +82,11 @@ class autoload_t : private lru_cache_t<autoload_function_t> {
    virtual void command_removed(const wcstring &cmd) { UNUSED(cmd); }

   public:
-    /// Create an autoload_t for the given environment variable name.
+
+    // CRTP override
+    void entry_was_evicted(wcstring key, autoload_function_t node);
+
+    // Create an autoload_t for the given environment variable name.
    autoload_t(const wcstring &env_var_name_var, const builtin_script_t *scripts,
               size_t script_count);

--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@ -1122,20 +1122,14 @@ static void test_escape_sequences(void) {
        err(L"test_escape_sequences failed on line %d\n", __LINE__);
 }

-class lru_node_test_t : public lru_node_t {
+class test_lru_t : public lru_cache_t<test_lru_t, int> {
   public:
-    explicit lru_node_test_t(const wcstring &tmp) : lru_node_t(tmp) {}
-};
+    test_lru_t() : lru_cache_t<test_lru_t, int>(16) {}

-class test_lru_t : public lru_cache_t<lru_node_test_t> {
-   public:
-    test_lru_t() : lru_cache_t<lru_node_test_t>(16) {}
+    std::vector<std::pair<wcstring, int>> evicted;

-    std::vector<lru_node_test_t *> evicted_nodes;
-
-    virtual void node_was_evicted(lru_node_test_t *node) {
-        do_test(find(evicted_nodes.begin(), evicted_nodes.end(), node) == evicted_nodes.end());
-        evicted_nodes.push_back(node);
+    void entry_was_evicted(wcstring key, int val) {
+        evicted.push_back({key, val});
    }
 };

@ -1143,24 +1137,18 @@ static void test_lru(void) {
    say(L"Testing LRU cache");

    test_lru_t cache;
-    std::vector<lru_node_test_t *> expected_evicted;
-    size_t total_nodes = 20;
-    for (size_t i = 0; i < total_nodes; i++) {
-        do_test(cache.size() == std::min(i, (size_t)16));
-        lru_node_test_t *node = new lru_node_test_t(to_string(i));
-        if (i < 4) expected_evicted.push_back(node);
+    std::vector<std::pair<wcstring, int>> expected_evicted;
+    int total_nodes = 20;
+    for (int i = 0; i < total_nodes; i++) {
+        do_test(cache.size() == size_t(std::min(i, 16)));
+        if (i < 4) expected_evicted.push_back({to_string(i), i});
        // Adding the node the first time should work, and subsequent times should fail.
-        do_test(cache.add_node(node));
-        do_test(!cache.add_node(node));
+        do_test(cache.insert(to_string(i), i));
+        do_test(!cache.insert(to_string(i), i+1));
    }
-    do_test(cache.evicted_nodes == expected_evicted);
+    do_test(cache.evicted == expected_evicted);
    cache.evict_all_nodes();
-    do_test(cache.evicted_nodes.size() == total_nodes);
-    while (!cache.evicted_nodes.empty()) {
-        lru_node_t *node = cache.evicted_nodes.back();
-        cache.evicted_nodes.pop_back();
-        delete node;
-    }
+    do_test(cache.evicted.size() == size_t(total_nodes));
 }

 /// Perform parameter expansion and test if the output equals the zero-terminated parameter list
--- a/src/history.cpp
+++ b/src/history.cpp
@ -142,23 +142,22 @@ static bool history_file_lock(int fd, int lock_type) {

 /// Our LRU cache is used for restricting the amount of history we have, and limiting how long we
 /// order it.
-class history_lru_node_t : public lru_node_t {
+class history_lru_item_t {
   public:
+    wcstring text;
    time_t timestamp;
    path_list_t required_paths;
-    explicit history_lru_node_t(const history_item_t &item)
-        : lru_node_t(item.str()),
+    explicit history_lru_item_t(const history_item_t &item)
+        : text(item.str()),
          timestamp(item.timestamp()),
          required_paths(item.get_required_paths()) {}
 };

-class history_lru_cache_t : public lru_cache_t<history_lru_node_t> {
-   protected:
-    /// Override to delete evicted nodes.
-    virtual void node_was_evicted(history_lru_node_t *node) { delete node; }
-
+class history_lru_cache_t : public lru_cache_t<history_lru_cache_t, history_lru_item_t> {
+    typedef lru_cache_t<history_lru_cache_t, history_lru_item_t> super;
   public:
-    explicit history_lru_cache_t(size_t max) : lru_cache_t<history_lru_node_t>(max) {}
+
+    using super::super;

    /// Function to add a history item.
    void add_item(const history_item_t &item) {
@ -167,10 +166,10 @@ class history_lru_cache_t : public lru_cache_t<history_lru_node_t> {

        // See if it's in the cache. If it is, update the timestamp. If not, we create a new node
        // and add it. Note that calling get_node promotes the node to the front.
-        history_lru_node_t *node = this->get_node(item.str());
+        wcstring key = item.str();
+        history_lru_item_t *node = this->get(key);
        if (node == NULL) {
-            node = new history_lru_node_t(item);
-            this->add_node(node);
+            this->insert(std::move(key), history_lru_item_t(item));
        } else {
            node->timestamp = std::max(node->timestamp, item.timestamp());
            // What to do about paths here? Let's just ignore them.
@ -1248,8 +1247,9 @@ bool history_t::save_internal_via_rewrite() {
            bool errored = false;
            history_output_buffer_t buffer;
            for (history_lru_cache_t::iterator iter = lru.begin(); iter != lru.end(); ++iter) {
-                const history_lru_node_t *node = *iter;
-                append_yaml_to_buffer(node->key, node->timestamp, node->required_paths, &buffer);
+                const wcstring &text = (*iter).first;
+                const history_lru_item_t &item = (*iter).second;
+                append_yaml_to_buffer(text, item.timestamp, item.required_paths, &buffer);
                if (buffer.output_size() >= HISTORY_OUTPUT_BUFFER_SIZE &&
                    !buffer.flush_to_fd(out_fd)) {
                    errored = true;
--- a/src/lru.h
+++ b/src/lru.h
@ -4,190 +4,204 @@

 #include <assert.h>
 #include <wchar.h>
-#include <list>
 #include <map>
-#include <set>

 #include "common.h"

-/// A predicate to compare dereferenced pointers.
-struct dereference_less_t {
-    template <typename ptr_t>
-    bool operator()(ptr_t p1, ptr_t p2) const {
-        return *p1 < *p2;
-    }
-};
-
-class lru_node_t {
-    template <class T>
-    friend class lru_cache_t;
-
-    /// Our linked list pointer.
-    lru_node_t *prev, *next;
-
-   public:
-    /// The key used to look up in the cache.
-    const wcstring key;
-
-    /// Constructor.
-    explicit lru_node_t(const wcstring &pkey) : prev(NULL), next(NULL), key(pkey) {}
-
-    /// Virtual destructor that does nothing for classes that inherit lru_node_t.
-    virtual ~lru_node_t() {}
-
-    /// operator< for std::set
-    bool operator<(const lru_node_t &other) const { return key < other.key; }
-};
-
-template <class node_type_t>
+// Least-recently-used cache class
+// This a map from wcstring to CONTENTS, that will evict entries when the count exceeds the maximum.
+// It uses CRTP to inform clients when entries are evicted. This uses the classic LRU cache structure:
+// a dictionary mapping keys to nodes, where the nodes also form a linked list. Our linked list is
+// circular and has a sentinel node (the "mouth" - picture a snake swallowing its tail). This simplifies
+// the logic: no pointer is ever NULL! It also works well with C++'s iterator since the sentinel node
+// is a natural value for end(). Our nodes also have the unusual property of having a "back pointer":
+// they store an iterator to the entry in the map containing the node. This allows us, given a node, to
+// immediately locate the node and its key in the dictionary. This allows us to avoid duplicating the key
+// in the node.
+template <class DERIVED, class CONTENTS>
 class lru_cache_t {
-   private:
-    /// Max node count. This may be (transiently) exceeded by add_node_without_eviction, which is
-    /// used from background threads.
+
+    struct lru_node_t;
+    typedef typename std::map<wcstring, lru_node_t>::iterator node_iter_t;
+
+    struct lru_link_t {
+        // Our doubly linked list
+        // The base class is used for the mouth only
+        lru_link_t *prev = NULL;
+        lru_link_t *next = NULL;
+    };
+
+    // The node type in our LRU cache
+    struct lru_node_t : public lru_link_t {
+        // No copying
+        lru_node_t(const lru_node_t &) = delete;
+        lru_node_t &operator=(const lru_node_t &) = delete;
+        lru_node_t(lru_node_t &&) = default;
+
+        // Our location in the map!
+        node_iter_t iter;
+
+        // The value from the client
+        CONTENTS value;
+
+        explicit lru_node_t(CONTENTS v) :
+            value(std::move(v))
+        {}
+    };
+
+
+    // Max node count. This may be (transiently) exceeded by add_node_without_eviction, which is
+    // used from background threads.
    const size_t max_node_count;

-    /// Count of nodes.
-    size_t node_count;
+    // All of our nodes
+    // Note that our linked list contains pointers to these nodes in the map
+    // We are dependent on the iterator-noninvalidation guarantees of std::map
+    std::map<wcstring, lru_node_t> node_map;

-    /// The set of nodes.
-    typedef std::set<lru_node_t *, dereference_less_t> node_set_t;
-    node_set_t node_set;
+    // Head of the linked list
+    // The list is circular!
+    // If "empty" the mouth just points at itself.
+    lru_link_t mouth;

-    void promote_node(node_type_t *node) {
-        // We should never promote the mouth.
+    // Take a node and move it to the front of the list
+    void promote_node(lru_node_t *node) {
        assert(node != &mouth);

-        // First unhook us.
+        // First unhook us
        node->prev->next = node->next;
        node->next->prev = node->prev;

-        // Put us after the mouth.
+        // Put us after the mouth
        node->next = mouth.next;
        node->next->prev = node;
        node->prev = &mouth;
        mouth.next = node;
    }

-    void evict_node(node_type_t *condemned_node) {
+    // Remove the node
+    void evict_node(lru_node_t *node) {
+        assert(node != &mouth);
+
        // We should never evict the mouth.
-        assert(condemned_node != NULL && condemned_node != &mouth);
+        assert(node != NULL && node->iter != this->node_map.end());

        // Remove it from the linked list.
-        condemned_node->prev->next = condemned_node->next;
-        condemned_node->next->prev = condemned_node->prev;
+        node->prev->next = node->next;
+        node->next->prev = node->prev;

-        // Remove us from the set.
-        node_set.erase(condemned_node);
-        node_count--;
+        // Pull out our key and value
+        wcstring key = std::move(node->iter->first);
+        CONTENTS value(std::move(node->value));

-        // Tell ourselves.
-        this->node_was_evicted(condemned_node);
+        // Remove us from the map. This deallocates node!
+        node_map.erase(node->iter);
+        node = NULL;
+
+        // Tell ourselves what we did
+        DERIVED *dthis = static_cast<DERIVED *>(this);
+        dthis->entry_was_evicted(std::move(key), std::move(value));
    }

-    void evict_last_node(void) { evict_node((node_type_t *)mouth.prev); }
+    // Evicts the last node
+    void evict_last_node() {
+        assert(mouth.prev != &mouth);
+        evict_node(static_cast<lru_node_t *>(mouth.prev));
+    }

-    static lru_node_t *get_previous(lru_node_t *node) { return node->prev; }
-
-   protected:
-    /// Head of the linked list.
-    lru_node_t mouth;
-
-    /// Overridable callback for when a node is evicted.
-    virtual void node_was_evicted(node_type_t *node) { UNUSED(node); }
+    // CRTP callback for when a node is evicted.
+    // Clients can implement this
+    void entry_was_evicted(wcstring key, CONTENTS value) {
+        USE(key);
+        USE(value);
+    }

   public:
-    /// Constructor
-    explicit lru_cache_t(size_t max_size = 1024)
-        : max_node_count(max_size), node_count(0), mouth(wcstring()) {
-        // Hook up the mouth to itself: a one node circularly linked list!
-        mouth.prev = mouth.next = &mouth;
+    // Constructor
+    // Note our linked list is always circular!
+    explicit lru_cache_t(size_t max_size = 1024) : max_node_count(max_size) {
+        mouth.next = mouth.prev = &mouth;
    }

-    /// Note that we do not evict nodes in our destructor (even though they typically need to be
-    /// deleted by their creator).
-    virtual ~lru_cache_t() {}
-
-    /// Returns the node for a given key, or NULL.
-    node_type_t *get_node(const wcstring &key) {
-        node_type_t *result = NULL;
-
-        // Construct a fake node as our key.
-        lru_node_t node_key(key);
-
-        // Look for it in the set.
-        node_set_t::iterator iter = node_set.find(&node_key);
-
-        // If we found a node, promote and return it.
-        if (iter != node_set.end()) {
-            result = static_cast<node_type_t *>(*iter);
-            promote_node(result);
+    // Returns the value for a given key, or NULL.
+    // This counts as a "use" and so promotes the node
+    CONTENTS *get(const wcstring &key) {
+        auto where = this->node_map.find(key);
+        if (where == this->node_map.end()) {
+            // not found
+            return NULL;
        }
-        return result;
+        promote_node(&where->second);
+        return &where->second.value;
    }

-    /// Evicts the node for a given key, returning true if a node was evicted.
+    // Evicts the node for a given key, returning true if a node was evicted.
    bool evict_node(const wcstring &key) {
-        // Construct a fake node as our key.
-        lru_node_t node_key(key);
-
-        // Look for it in the set.
-        node_set_t::iterator iter = node_set.find(&node_key);
-        if (iter == node_set.end()) return false;
-
-        // Evict the given node.
-        evict_node(static_cast<node_type_t *>(*iter));
+        auto where = this->node_map.find(key);
+        if (where == this->node_map.end()) return false;
+        evict_node(&where->second);
        return true;
    }

-    /// Adds a node under the given key. Returns true if the node was added, false if the node was
-    /// not because a node with that key is already in the set.
-    bool add_node(node_type_t *node) {
-        // Add our node without eviction.
-        if (!this->add_node_without_eviction(node)) return false;
+    // Adds a node under the given key. Returns true if the node was added, false if the node was
+    // not because a node with that key is already in the set.
+    bool insert(wcstring key, CONTENTS value) {
+        if (! this->insert_no_eviction(std::move(key), std::move(value))) {
+            return false;
+        }

-        while (node_count > max_node_count) evict_last_node();  // evict
+        while (this->node_map.size() > max_node_count) {
+            evict_last_node();
+        }
        return true;
    }

-    /// Adds a node under the given key without triggering eviction. Returns true if the node was
-    /// added, false if the node was not because a node with that key is already in the set.
-    bool add_node_without_eviction(node_type_t *node) {
-        assert(node != NULL && node != &mouth);
-
+    // Adds a node under the given key without triggering eviction. Returns true if the node was
+    // added, false if the node was not because a node with that key is already in the set.
+    bool insert_no_eviction(wcstring key, CONTENTS value) {
        // Try inserting; return false if it was already in the set.
-        if (!node_set.insert(node).second) return false;
+        auto iter_inserted = this->node_map.emplace(std::move(key), lru_node_t(std::move(value)));
+        if (! iter_inserted.second) {
+            // already present
+            return false;
+        }
+
+        // Tell the node where it is in the map
+        node_iter_t iter = iter_inserted.first;
+        lru_node_t *node = &iter->second;
+        node->iter = iter;

-        // Add the node after the mouth.
        node->next = mouth.next;
        node->next->prev = node;
        node->prev = &mouth;
        mouth.next = node;
-
-        // Update the count. This may push us over the maximum node count.
-        node_count++;
        return true;
    }

-    /// Counts nodes.
-    size_t size(void) { return node_count; }
+    // Number of entries
+    size_t size() { return this->node_map.size(); }

-    /// Evicts all nodes.
    void evict_all_nodes(void) {
-        while (node_count > 0) {
+        while (this->size() > 0) {
            evict_last_node();
        }
    }

-    /// Iterator for walking nodes, from least recently used to most.
+    // Iterator for walking nodes, from least recently used to most.
    class iterator {
-        lru_node_t *node;
-
+        lru_link_t *node;
       public:
-        explicit iterator(lru_node_t *val) : node(val) {}
-        void operator++() { node = lru_cache_t::get_previous(node); }
+        typedef std::pair<const wcstring &, const CONTENTS &> value_type;
+
+        explicit iterator(lru_link_t *val) : node(val) {}
+        void operator++() { node = node->prev; }
        bool operator==(const iterator &other) { return node == other.node; }
        bool operator!=(const iterator &other) { return !(*this == other); }
-        node_type_t *operator*() { return static_cast<node_type_t *>(node); }
+        value_type operator*() const {
+            const lru_node_t *dnode = static_cast<const lru_node_t *>(node);
+            const wcstring &key = dnode->iter->first;
+            return {key, dnode->value};
+        }
    };

    iterator begin() { return iterator(mouth.prev); }