Merge branch 'master' into performance

2025-12-06 17:27:55 +00:00 · 2013-12-06 14:15:42 -08:00
parent ef2211a9ca b1d2de4a40
commit 551e9428ce
37 changed files with 526 additions and 854 deletions
--- a/include/rocksdb/compaction_filter.h
+++ b/include/rocksdb/compaction_filter.h
@@ -40,6 +40,16 @@ class CompactionFilter {
  // When the value is to be preserved, the application has the option
  // to modify the existing_value and pass it back through new_value.
  // value_changed needs to be set to true in this case.
+  //
+  // If multithreaded compaction is being used *and* a single CompactionFilter
+  // instance was supplied via Options::compaction_filter, this method may be
+  // called from different threads concurrently.  The application must ensure
+  // that the call is thread-safe.
+  //
+  // If the CompactionFilter was created by a factory, then it will only ever
+  // be used by a single thread that is doing the compaction run, and this
+  // call does not need to be thread-safe.  However, multiple filters may be
+  // in existence and operating concurrently.
  virtual bool Filter(int level,
                      const Slice& key,
                      const Slice& existing_value,
--- a/include/rocksdb/db.h
+++ b/include/rocksdb/db.h
@@ -273,7 +273,7 @@ class DB {
  // Sets iter to an iterator that is positioned at a write-batch containing
  // seq_number. If the sequence number is non existent, it returns an iterator
  // at the first available seq_no after the requested seq_no
-  // Returns Status::Ok if iterator is valid
+  // Returns Status::OK if iterator is valid
  // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
  // use this api, else the WAL files will get
  // cleared aggressively and the iterator might keep getting invalid before
@@ -292,6 +292,11 @@ class DB {
    std::vector<LiveFileMetaData> *metadata) {
  }

+  // Sets the globally unique ID created at database creation time by invoking
+  // Env::GenerateUniqueId(), in identity. Returns Status::OK if identity could
+  // be set properly
+  virtual Status GetDbIdentity(std::string& identity) = 0;
+
 private:
  // No copying allowed
  DB(const DB&);
--- a/include/rocksdb/memtablerep.h
+++ b/include/rocksdb/memtablerep.h
@@ -17,21 +17,13 @@
 // The factory will be passed an Arena object when a new MemTableRep is
 // requested. The API for this object is in rocksdb/arena.h.
 //
-// Users can implement their own memtable representations. We include four
+// Users can implement their own memtable representations. We include three
 // types built in:
 //  - SkipListRep: This is the default; it is backed by a skip list.
-//  - TransformRep: This is backed by an custom hash map.
-// On construction, they are given a SliceTransform object. This
-// object is applied to the user key of stored items which indexes into the
-// hash map to yield a skiplist containing all records that share the same
-// user key under the transform function.
-//  - UnsortedRep: A subclass of TransformRep where the transform function is
-// the identity function. Optimized for point lookups.
-//  - PrefixHashRep: A subclass of TransformRep where the transform function is
-// a fixed-size prefix extractor. If you use PrefixHashRepFactory, the transform
-// must be identical to options.prefix_extractor, otherwise it will be discarded
-// and the default will be used. It is optimized for ranged scans over a
-// prefix.
+//  - HashSkipListRep: The memtable rep that is best used for keys that are
+//  structured like "prefix:suffix" where iteration withing a prefix is
+//  common and iteration across different prefixes is rare. It is backed by
+//  a hash map where each bucket is a skip list.
 //  - VectorRep: This is backed by an unordered std::vector. On iteration, the
 // vector is sorted. It is intelligent about sorting; once the MarkReadOnly()
 // has been called, the vector will only be sorted once. It is optimized for
@@ -186,16 +178,14 @@ public:
  }
 };

-// TransformReps are backed by an unordered map of buffers to buckets. When
-// looking up a key, the user key is extracted and a user-supplied transform
-// function (see rocksdb/slice_transform.h) is applied to get the key into the
-// unordered map. This allows the user to bin user keys based on arbitrary
-// criteria. Two example implementations are UnsortedRepFactory and
-// PrefixHashRepFactory.
+// HashSkipListRep is backed by hash map of buckets. Each bucket is a skip
+// list. All the keys with the same prefix will be in the same bucket.
+// The prefix is determined using user supplied SliceTransform. It has
+// to match prefix_extractor in options.prefix_extractor.
 //
 // Iteration over the entire collection is implemented by dumping all the keys
-// into an std::set. Thus, these data structures are best used when iteration
-// over the entire collection is rare.
+// into a separate skip list. Thus, these data structures are best used when
+// iteration over the entire collection is rare.
 //
 // Parameters:
 //   transform: The SliceTransform to bucket user keys on. TransformRepFactory
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -15,11 +15,9 @@
 #include <vector>
 #include <stdint.h>

-#include "rocksdb/memtablerep.h"
 #include "rocksdb/memtablerep.h"
 #include "rocksdb/slice.h"
 #include "rocksdb/slice_transform.h"
-#include "rocksdb/slice_transform.h"
 #include "rocksdb/statistics.h"
 #include "rocksdb/table_properties.h"
 #include "rocksdb/universal_compaction.h"
@@ -95,16 +93,33 @@ struct Options {
  // Default: nullptr
  shared_ptr<MergeOperator> merge_operator;

-  // The client must provide compaction_filter_factory if it requires a new
-  // compaction filter to be used for different compaction processes
+  // A single CompactionFilter instance to call into during compaction.
  // Allows an application to modify/delete a key-value during background
  // compaction.
-  // Ideally, client should specify only one of filter or factory.
+  //
+  // If the client requires a new compaction filter to be used for different
+  // compaction runs, it can specify compaction_filter_factory instead of this
+  // option.  The client should specify only one of the two.
  // compaction_filter takes precedence over compaction_filter_factory if
  // client specifies both.
+  //
+  // If multithreaded compaction is being used, the supplied CompactionFilter
+  // instance may be used from different threads concurrently and so should be
+  // thread-safe.
+  //
  // Default: nullptr
  const CompactionFilter* compaction_filter;

+  // This is a factory that provides compaction filter objects which allow
+  // an application to modify/delete a key-value during background compaction.
+  //
+  // A new filter will be created on each compaction run.  If multithreaded
+  // compaction is being used, each created CompactionFilter will only be used
+  // from a single thread and so does not need to be thread-safe.
+  //
+  // Default: a factory that doesn't provide any object
+  std::shared_ptr<CompactionFilterFactory> compaction_filter_factory;
+
  // If true, the database will be created if it is missing.
  // Default: false
  bool create_if_missing;
@@ -602,11 +617,6 @@ struct Options {
  // Table and TableBuilder.
  std::shared_ptr<TableFactory> table_factory;

-  // This is a factory that provides compaction filter objects which allow
-  // an application to modify/delete a key-value during background compaction.
-  // Default: a factory that doesn't provide any object
-  std::shared_ptr<CompactionFilterFactory> compaction_filter_factory;
-
  // This option allows user to to collect their own interested statistics of
  // the tables.
  // Default: emtpy vector -- no user-defined statistics collection will be
--- a/include/rocksdb/statistics.h
+++ b/include/rocksdb/statistics.h
@@ -51,6 +51,11 @@ enum Tickers {
  // # of times bloom filter has avoided file reads.
  BLOOM_FILTER_USEFUL,

+  // # of memtable hits.
+  MEMTABLE_HIT,
+  // # of memtable misses.
+  MEMTABLE_MISS,
+
  /**
   * COMPACTION_KEY_DROP_* count the reasons for key drop during compaction
   * There are 3 reasons currently.
@@ -125,6 +130,8 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
  { BLOCK_CACHE_DATA_MISS, "rocksdb.block.cache.data.miss" },
  { BLOCK_CACHE_DATA_HIT, "rocksdb.block.cache.data.hit" },
  { BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful" },
+  { MEMTABLE_HIT, "rocksdb.memtable.hit" },
+  { MEMTABLE_MISS, "rocksdb.memtable.miss" },
  { COMPACTION_KEY_DROP_NEWER_ENTRY, "rocksdb.compaction.key.drop.new" },
  { COMPACTION_KEY_DROP_OBSOLETE, "rocksdb.compaction.key.drop.obsolete" },
  { COMPACTION_KEY_DROP_USER, "rocksdb.compaction.key.drop.user" },