// Least-recently-used cache implementation. #ifndef FISH_LRU_H #define FISH_LRU_H #include #include #include "common.h" // Least-recently-used cache class. // // This a map from wcstring to CONTENTS, that will evict entries when the count exceeds the maximum. // It uses CRTP to inform clients when entries are evicted. This uses the classic LRU cache // structure: a dictionary mapping keys to nodes, where the nodes also form a linked list. Our // linked list is circular and has a sentinel node (the "mouth" - picture a snake swallowing its // tail). This simplifies the logic: no pointer is ever NULL! It also works well with C++'s iterator // since the sentinel node is a natural value for end(). Our nodes also have the unusual property of // having a "back pointer": they store an iterator to the entry in the map containing the node. This // allows us, given a node, to immediately locate the node and its key in the dictionary. This // allows us to avoid duplicating the key in the node. template class lru_cache_t { struct lru_node_t; struct lru_link_t { // Our doubly linked list // The base class is used for the mouth only lru_link_t *prev = NULL; lru_link_t *next = NULL; }; // The node type in our LRU cache struct lru_node_t : public lru_link_t { // No copying lru_node_t(const lru_node_t &) = delete; lru_node_t &operator=(const lru_node_t &) = delete; lru_node_t(lru_node_t &&) = default; // Our key in the map. This is owned by the map itself. const wcstring *key = NULL; // The value from the client CONTENTS value; explicit lru_node_t(const CONTENTS &v) : value(std::move(v)) {} }; typedef typename std::unordered_map::iterator node_iter_t; // Max node count. This may be (transiently) exceeded by add_node_without_eviction, which is // used from background threads. const size_t max_node_count; // All of our nodes // Note that our linked list contains pointers to these nodes in the map // We are dependent on the iterator-noninvalidation guarantees of std::map std::unordered_map node_map; // Head of the linked list // The list is circular! // If "empty" the mouth just points at itself. lru_link_t mouth; // Take a node and move it to the front of the list void promote_node(lru_node_t *node) { assert(node != &mouth); // First unhook us node->prev->next = node->next; node->next->prev = node->prev; // Put us after the mouth node->next = mouth.next; node->next->prev = node; node->prev = &mouth; mouth.next = node; } // Remove the node void evict_node(lru_node_t *node) { // We should never evict the mouth. assert(node != &mouth && node != NULL && node->key != NULL); auto iter = this->node_map.find(*node->key); assert(iter != this->node_map.end()); // Remove it from the linked list. node->prev->next = node->next; node->next->prev = node->prev; // Pull out our key and value // Note we copy the key in case the map needs it to erase the node wcstring key = *node->key; CONTENTS value(std::move(node->value)); // Remove us from the map. This deallocates node! node_map.erase(iter); // Tell ourselves what we did DERIVED *dthis = static_cast(this); dthis->entry_was_evicted(std::move(key), std::move(value)); } // Evicts the last node void evict_last_node() { assert(mouth.prev != &mouth); evict_node(static_cast(mouth.prev)); } // CRTP callback for when a node is evicted. // Clients can implement this void entry_was_evicted(wcstring key, CONTENTS value) { UNUSED(key); UNUSED(value); } // Implementation of merge step for mergesort // Given two singly linked lists left and right, // and a binary func F implementing less-than, return // the list in sorted order template static lru_link_t *merge(lru_link_t *left, size_t left_len, lru_link_t *right, size_t right_len, const F &func) { assert(left_len > 0 && right_len > 0); auto popleft = [&]() { lru_link_t *ret = left; left = left->next; left_len--; return ret; }; auto popright = [&]() { lru_link_t *ret = right; right = right->next; right_len--; return ret; }; lru_link_t *head; lru_link_t **cursor = &head; while (left_len && right_len) { bool goleft = !func(static_cast(left)->value, static_cast(right)->value); *cursor = goleft ? popleft() : popright(); cursor = &(*cursor)->next; } while (left_len || right_len) { *cursor = left_len ? popleft() : popright(); cursor = &(*cursor)->next; } return head; } // mergesort the given list of the given length // This only sets the next pointers, not the prev ones template static lru_link_t *mergesort(lru_link_t *node, size_t length, const F &func) { if (length <= 1) { return node; } // divide us into two lists, left and right const size_t left_len = length / 2; const size_t right_len = length - left_len; lru_link_t *left = node; lru_link_t *right = node; for (size_t i = 0; i < left_len; i++) { right = right->next; } // Recursive sorting left = mergesort(left, left_len, func); right = mergesort(right, right_len, func); // Merge them return merge(left, left_len, right, right_len, func); } public: // Constructor. Note our linked list is always circular. explicit lru_cache_t(size_t max_size = 1024) : max_node_count(max_size) { mouth.next = mouth.prev = &mouth; } // Returns the value for a given key, or NULL. // This counts as a "use" and so promotes the node CONTENTS *get(const wcstring &key) { auto where = this->node_map.find(key); if (where == this->node_map.end()) { // not found return NULL; } promote_node(&where->second); return &where->second.value; } // Evicts the node for a given key, returning true if a node was evicted. bool evict_node(const wcstring &key) { auto where = this->node_map.find(key); if (where == this->node_map.end()) return false; evict_node(&where->second); return true; } // Adds a node under the given key. Returns true if the node was added, false if the node was // not because a node with that key is already in the set. bool insert(wcstring key, CONTENTS value) { if (!this->insert_no_eviction(std::move(key), std::move(value))) { return false; } while (this->node_map.size() > max_node_count) { evict_last_node(); } return true; } // Adds a node under the given key without triggering eviction. Returns true if the node was // added, false if the node was not because a node with that key is already in the set. bool insert_no_eviction(wcstring key, CONTENTS value) { // Try inserting; return false if it was already in the set. auto iter_inserted = this->node_map.emplace(std::move(key), lru_node_t(std::move(value))); if (!iter_inserted.second) { // already present - so promote it promote_node(&iter_inserted.first->second); return false; } // Tell the node where it is in the map node_iter_t iter = iter_inserted.first; lru_node_t *node = &iter->second; node->key = &iter->first; node->next = mouth.next; node->next->prev = node; node->prev = &mouth; mouth.next = node; return true; } // Number of entries size_t size() const { return this->node_map.size(); } // Given a binary function F implementing less-than on the contents, place the nodes in sorted // order. template void stable_sort(const F &func) { // Perform the sort. This sets forward pointers only size_t length = this->size(); if (length <= 1) { return; } lru_link_t *sorted = mergesort(this->mouth.next, length, func); mouth.next = sorted; // Go through and set back back pointers lru_link_t *cursor = sorted; lru_link_t *prev = &mouth; for (size_t i = 0; i < length; i++) { cursor->prev = prev; prev = cursor; cursor = cursor->next; } // prev is now last element in list // make the list circular prev->next = &mouth; mouth.prev = prev; } void evict_all_nodes(void) { while (this->size() > 0) { evict_last_node(); } } // Iterator for walking nodes, from least recently used to most. class iterator { const lru_link_t *node; public: typedef std::pair value_type; explicit iterator(const lru_link_t *val) : node(val) {} void operator++() { node = node->prev; } bool operator==(const iterator &other) const { return node == other.node; } bool operator!=(const iterator &other) const { return !(*this == other); } value_type operator*() const { const lru_node_t *dnode = static_cast(node); return {*dnode->key, dnode->value}; } }; iterator begin() const { return iterator(mouth.prev); } iterator end() const { return iterator(&mouth); } void check_sanity() const { // Check linked list sanity size_t expected_count = this->size(); const lru_link_t *prev = &mouth; const lru_link_t *cursor = mouth.next; size_t max = 1024 * 1024 * 64; size_t count = 0; while (cursor != &mouth) { if (cursor->prev != prev) { DIE("node busted previous link"); } prev = cursor; cursor = cursor->next; if (count++ > max) { DIE("LRU cache unable to re-reach the mouth - not circularly linked?"); } } if (mouth.prev != prev) { DIE("mouth.prev does not connect to last node"); } if (count != expected_count) { DIE("linked list count mismatch from map count"); } // Count iterators size_t iter_dist = 0; for (const auto &p : *this) { UNUSED(p); iter_dist++; } if (iter_dist != count) { DIE("linked list iterator mismatch from map count"); } } }; #endif