Squashed 'src/rocksdb/' content from commit 224932d

git-subtree-dir: src/rocksdb git-subtree-split: 224932d4d0b561712107d747c662df181c39644d
2025-12-06 17:27:52 +00:00 · 2014-08-08 11:57:41 -07:00
commit f86d9fd626
435 changed files with 123706 additions and 0 deletions
--- a/util/cache.cc
+++ b/util/cache.cc
@@ -0,0 +1,481 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "rocksdb/cache.h"
+#include "port/port.h"
+#include "util/autovector.h"
+#include "util/hash.h"
+#include "util/mutexlock.h"
+
+namespace rocksdb {
+
+Cache::~Cache() {
+}
+
+namespace {
+
+// LRU cache implementation
+
+// An entry is a variable length heap-allocated structure.  Entries
+// are kept in a circular doubly linked list ordered by access time.
+struct LRUHandle {
+  void* value;
+  void (*deleter)(const Slice&, void* value);
+  LRUHandle* next_hash;
+  LRUHandle* next;
+  LRUHandle* prev;
+  size_t charge;      // TODO(opt): Only allow uint32_t?
+  size_t key_length;
+  uint32_t refs;
+  uint32_t hash;      // Hash of key(); used for fast sharding and comparisons
+  char key_data[1];   // Beginning of key
+
+  Slice key() const {
+    // For cheaper lookups, we allow a temporary Handle object
+    // to store a pointer to a key in "value".
+    if (next == this) {
+      return *(reinterpret_cast<Slice*>(value));
+    } else {
+      return Slice(key_data, key_length);
+    }
+  }
+};
+
+// We provide our own simple hash table since it removes a whole bunch
+// of porting hacks and is also faster than some of the built-in hash
+// table implementations in some of the compiler/runtime combinations
+// we have tested.  E.g., readrandom speeds up by ~5% over the g++
+// 4.4.3's builtin hashtable.
+class HandleTable {
+ public:
+  HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); }
+  ~HandleTable() { delete[] list_; }
+
+  LRUHandle* Lookup(const Slice& key, uint32_t hash) {
+    return *FindPointer(key, hash);
+  }
+
+  LRUHandle* Insert(LRUHandle* h) {
+    LRUHandle** ptr = FindPointer(h->key(), h->hash);
+    LRUHandle* old = *ptr;
+    h->next_hash = (old == nullptr ? nullptr : old->next_hash);
+    *ptr = h;
+    if (old == nullptr) {
+      ++elems_;
+      if (elems_ > length_) {
+        // Since each cache entry is fairly large, we aim for a small
+        // average linked list length (<= 1).
+        Resize();
+      }
+    }
+    return old;
+  }
+
+  LRUHandle* Remove(const Slice& key, uint32_t hash) {
+    LRUHandle** ptr = FindPointer(key, hash);
+    LRUHandle* result = *ptr;
+    if (result != nullptr) {
+      *ptr = result->next_hash;
+      --elems_;
+    }
+    return result;
+  }
+
+ private:
+  // The table consists of an array of buckets where each bucket is
+  // a linked list of cache entries that hash into the bucket.
+  uint32_t length_;
+  uint32_t elems_;
+  LRUHandle** list_;
+
+  // Return a pointer to slot that points to a cache entry that
+  // matches key/hash.  If there is no such cache entry, return a
+  // pointer to the trailing slot in the corresponding linked list.
+  LRUHandle** FindPointer(const Slice& key, uint32_t hash) {
+    LRUHandle** ptr = &list_[hash & (length_ - 1)];
+    while (*ptr != nullptr &&
+           ((*ptr)->hash != hash || key != (*ptr)->key())) {
+      ptr = &(*ptr)->next_hash;
+    }
+    return ptr;
+  }
+
+  void Resize() {
+    uint32_t new_length = 16;
+    while (new_length < elems_ * 1.5) {
+      new_length *= 2;
+    }
+    LRUHandle** new_list = new LRUHandle*[new_length];
+    memset(new_list, 0, sizeof(new_list[0]) * new_length);
+    uint32_t count = 0;
+    for (uint32_t i = 0; i < length_; i++) {
+      LRUHandle* h = list_[i];
+      while (h != nullptr) {
+        LRUHandle* next = h->next_hash;
+        uint32_t hash = h->hash;
+        LRUHandle** ptr = &new_list[hash & (new_length - 1)];
+        h->next_hash = *ptr;
+        *ptr = h;
+        h = next;
+        count++;
+      }
+    }
+    assert(elems_ == count);
+    delete[] list_;
+    list_ = new_list;
+    length_ = new_length;
+  }
+};
+
+// A single shard of sharded cache.
+class LRUCache {
+ public:
+  LRUCache();
+  ~LRUCache();
+
+  // Separate from constructor so caller can easily make an array of LRUCache
+  void SetCapacity(size_t capacity) { capacity_ = capacity; }
+  void SetRemoveScanCountLimit(size_t remove_scan_count_limit) {
+    remove_scan_count_limit_ = remove_scan_count_limit;
+  }
+
+  // Like Cache methods, but with an extra "hash" parameter.
+  Cache::Handle* Insert(const Slice& key, uint32_t hash,
+                        void* value, size_t charge,
+                        void (*deleter)(const Slice& key, void* value));
+  Cache::Handle* Lookup(const Slice& key, uint32_t hash);
+  void Release(Cache::Handle* handle);
+  void Erase(const Slice& key, uint32_t hash);
+  // Although in some platforms the update of size_t is atomic, to make sure
+  // GetUsage() works correctly under any platforms, we'll protect this
+  // function with mutex.
+  size_t GetUsage() const {
+    MutexLock l(&mutex_);
+    return usage_;
+  }
+
+  void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
+                              bool thread_safe);
+
+ private:
+  void LRU_Remove(LRUHandle* e);
+  void LRU_Append(LRUHandle* e);
+  // Just reduce the reference count by 1.
+  // Return true if last reference
+  bool Unref(LRUHandle* e);
+  // Call deleter and free
+  void FreeEntry(LRUHandle* e);
+
+  // Initialized before use.
+  size_t capacity_;
+  uint32_t remove_scan_count_limit_;
+
+  // mutex_ protects the following state.
+  // We don't count mutex_ as the cache's internal state so semantically we
+  // don't mind mutex_ invoking the non-const actions.
+  mutable port::Mutex mutex_;
+  size_t usage_;
+
+  // Dummy head of LRU list.
+  // lru.prev is newest entry, lru.next is oldest entry.
+  LRUHandle lru_;
+
+  HandleTable table_;
+};
+
+LRUCache::LRUCache()
+    : usage_(0) {
+  // Make empty circular linked list
+  lru_.next = &lru_;
+  lru_.prev = &lru_;
+}
+
+LRUCache::~LRUCache() {
+  for (LRUHandle* e = lru_.next; e != &lru_; ) {
+    LRUHandle* next = e->next;
+    assert(e->refs == 1);  // Error if caller has an unreleased handle
+    if (Unref(e)) {
+      FreeEntry(e);
+    }
+    e = next;
+  }
+}
+
+bool LRUCache::Unref(LRUHandle* e) {
+  assert(e->refs > 0);
+  e->refs--;
+  return e->refs == 0;
+}
+
+void LRUCache::FreeEntry(LRUHandle* e) {
+  assert(e->refs == 0);
+  (*e->deleter)(e->key(), e->value);
+  free(e);
+}
+
+void LRUCache::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
+                                      bool thread_safe) {
+  if (thread_safe) {
+    mutex_.Lock();
+  }
+  for (auto e = lru_.next; e != &lru_; e = e->next) {
+    callback(e->value, e->charge);
+  }
+  if (thread_safe) {
+    mutex_.Unlock();
+  }
+}
+
+void LRUCache::LRU_Remove(LRUHandle* e) {
+  e->next->prev = e->prev;
+  e->prev->next = e->next;
+  usage_ -= e->charge;
+}
+
+void LRUCache::LRU_Append(LRUHandle* e) {
+  // Make "e" newest entry by inserting just before lru_
+  e->next = &lru_;
+  e->prev = lru_.prev;
+  e->prev->next = e;
+  e->next->prev = e;
+  usage_ += e->charge;
+}
+
+Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
+  MutexLock l(&mutex_);
+  LRUHandle* e = table_.Lookup(key, hash);
+  if (e != nullptr) {
+    e->refs++;
+    LRU_Remove(e);
+    LRU_Append(e);
+  }
+  return reinterpret_cast<Cache::Handle*>(e);
+}
+
+void LRUCache::Release(Cache::Handle* handle) {
+  LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
+  bool last_reference = false;
+  {
+    MutexLock l(&mutex_);
+    last_reference = Unref(e);
+  }
+  if (last_reference) {
+    FreeEntry(e);
+  }
+}
+
+Cache::Handle* LRUCache::Insert(
+    const Slice& key, uint32_t hash, void* value, size_t charge,
+    void (*deleter)(const Slice& key, void* value)) {
+
+  LRUHandle* e = reinterpret_cast<LRUHandle*>(
+      malloc(sizeof(LRUHandle)-1 + key.size()));
+  autovector<LRUHandle*> last_reference_list;
+
+  e->value = value;
+  e->deleter = deleter;
+  e->charge = charge;
+  e->key_length = key.size();
+  e->hash = hash;
+  e->refs = 2;  // One from LRUCache, one for the returned handle
+  memcpy(e->key_data, key.data(), key.size());
+
+  {
+    MutexLock l(&mutex_);
+
+    LRU_Append(e);
+
+    LRUHandle* old = table_.Insert(e);
+    if (old != nullptr) {
+      LRU_Remove(old);
+      if (Unref(old)) {
+        last_reference_list.push_back(old);
+      }
+    }
+
+    if (remove_scan_count_limit_ > 0) {
+      // Try to free the space by evicting the entries that are only
+      // referenced by the cache first.
+      LRUHandle* cur = lru_.next;
+      for (unsigned int scanCount = 0;
+           usage_ > capacity_ && cur != &lru_
+           && scanCount < remove_scan_count_limit_; scanCount++) {
+        LRUHandle* next = cur->next;
+        if (cur->refs <= 1) {
+          LRU_Remove(cur);
+          table_.Remove(cur->key(), cur->hash);
+          if (Unref(cur)) {
+            last_reference_list.push_back(cur);
+          }
+        }
+        cur = next;
+      }
+    }
+
+    // Free the space following strict LRU policy until enough space
+    // is freed.
+    while (usage_ > capacity_ && lru_.next != &lru_) {
+      LRUHandle* old = lru_.next;
+      LRU_Remove(old);
+      table_.Remove(old->key(), old->hash);
+      if (Unref(old)) {
+        last_reference_list.push_back(old);
+      }
+    }
+  }
+
+  // we free the entries here outside of mutex for
+  // performance reasons
+  for (auto entry : last_reference_list) {
+    FreeEntry(entry);
+  }
+
+  return reinterpret_cast<Cache::Handle*>(e);
+}
+
+void LRUCache::Erase(const Slice& key, uint32_t hash) {
+  LRUHandle* e;
+  bool last_reference = false;
+  {
+    MutexLock l(&mutex_);
+    e = table_.Remove(key, hash);
+    if (e != nullptr) {
+      LRU_Remove(e);
+      last_reference = Unref(e);
+    }
+  }
+  // mutex not held here
+  // last_reference will only be true if e != nullptr
+  if (last_reference) {
+    FreeEntry(e);
+  }
+}
+
+static int kNumShardBits = 4;          // default values, can be overridden
+static int kRemoveScanCountLimit = 0; // default values, can be overridden
+
+class ShardedLRUCache : public Cache {
+ private:
+  LRUCache* shards_;
+  port::Mutex id_mutex_;
+  uint64_t last_id_;
+  int num_shard_bits_;
+  size_t capacity_;
+
+  static inline uint32_t HashSlice(const Slice& s) {
+    return Hash(s.data(), s.size(), 0);
+  }
+
+  uint32_t Shard(uint32_t hash) {
+    // Note, hash >> 32 yields hash in gcc, not the zero we expect!
+    return (num_shard_bits_ > 0) ? (hash >> (32 - num_shard_bits_)) : 0;
+  }
+
+  void init(size_t capacity, int numbits, int removeScanCountLimit) {
+    num_shard_bits_ = numbits;
+    capacity_ = capacity;
+    int num_shards = 1 << num_shard_bits_;
+    shards_ = new LRUCache[num_shards];
+    const size_t per_shard = (capacity + (num_shards - 1)) / num_shards;
+    for (int s = 0; s < num_shards; s++) {
+      shards_[s].SetCapacity(per_shard);
+      shards_[s].SetRemoveScanCountLimit(removeScanCountLimit);
+    }
+  }
+
+ public:
+  explicit ShardedLRUCache(size_t capacity)
+      : last_id_(0) {
+    init(capacity, kNumShardBits, kRemoveScanCountLimit);
+  }
+  ShardedLRUCache(size_t capacity, int num_shard_bits,
+                  int removeScanCountLimit)
+     : last_id_(0) {
+    init(capacity, num_shard_bits, removeScanCountLimit);
+  }
+  virtual ~ShardedLRUCache() {
+    delete[] shards_;
+  }
+  virtual Handle* Insert(const Slice& key, void* value, size_t charge,
+                         void (*deleter)(const Slice& key, void* value)) {
+    const uint32_t hash = HashSlice(key);
+    return shards_[Shard(hash)].Insert(key, hash, value, charge, deleter);
+  }
+  virtual Handle* Lookup(const Slice& key) {
+    const uint32_t hash = HashSlice(key);
+    return shards_[Shard(hash)].Lookup(key, hash);
+  }
+  virtual void Release(Handle* handle) {
+    LRUHandle* h = reinterpret_cast<LRUHandle*>(handle);
+    shards_[Shard(h->hash)].Release(handle);
+  }
+  virtual void Erase(const Slice& key) {
+    const uint32_t hash = HashSlice(key);
+    shards_[Shard(hash)].Erase(key, hash);
+  }
+  virtual void* Value(Handle* handle) {
+    return reinterpret_cast<LRUHandle*>(handle)->value;
+  }
+  virtual uint64_t NewId() {
+    MutexLock l(&id_mutex_);
+    return ++(last_id_);
+  }
+  virtual size_t GetCapacity() const {
+    return capacity_;
+  }
+
+  virtual size_t GetUsage() const {
+    // We will not lock the cache when getting the usage from shards.
+    // for (size_t i = 0; i < num_shard_bits_; ++i)
+    int num_shards = 1 << num_shard_bits_;
+    size_t usage = 0;
+    for (int s = 0; s < num_shards; s++) {
+      usage += shards_[s].GetUsage();
+    }
+    return usage;
+  }
+
+  virtual void DisownData() {
+    shards_ = nullptr;
+  }
+
+  virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
+                                      bool thread_safe) override {
+    int num_shards = 1 << num_shard_bits_;
+    for (int s = 0; s < num_shards; s++) {
+      shards_[s].ApplyToAllCacheEntries(callback, thread_safe);
+    }
+  }
+};
+
+}  // end anonymous namespace
+
+shared_ptr<Cache> NewLRUCache(size_t capacity) {
+  return NewLRUCache(capacity, kNumShardBits);
+}
+
+shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits) {
+  return NewLRUCache(capacity, num_shard_bits, kRemoveScanCountLimit);
+}
+
+shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits,
+                              int removeScanCountLimit) {
+  if (num_shard_bits >= 20) {
+    return nullptr;  // the cache cannot be sharded into too many fine pieces
+  }
+  return std::make_shared<ShardedLRUCache>(capacity,
+                                           num_shard_bits,
+                                           removeScanCountLimit);
+}
+
+}  // namespace rocksdb