Prefix scan: db_bench and bug fixes

Summary: If use_prefix_filters is set and read_range>1, then the random seeks will set a the prefix filter to be the prefix of the key which was randomly selected as the target. Still need to add statistics (perhaps in a separate diff). Test Plan: ./db_bench --benchmarks=fillseq,prefixscanrandom --num=10000000 --statistics=1 --use_prefix_blooms=1 --use_prefix_api=1 --bloom_bits=10 Reviewers: dhruba Reviewed By: dhruba CC: leveldb, haobo Differential Revision: https://reviews.facebook.net/D12273
2025-12-06 17:27:55 +00:00 · 2013-08-22 16:06:50 -07:00
parent 60bf2b7d4a
commit c2bd8f4824
10 changed files with 164 additions and 16 deletions
--- a/table/filter_block.cc
+++ b/table/filter_block.cc
@@ -45,6 +45,7 @@ bool FilterBlockBuilder::SamePrefix(const Slice &key1,
 }

 void FilterBlockBuilder::AddKey(const Slice& key) {
+  // get slice for most recently added entry
  Slice prev;
  if (start_.size() > 0) {
    size_t prev_start = start_[start_.size() - 1];
@@ -53,17 +54,21 @@ void FilterBlockBuilder::AddKey(const Slice& key) {
    prev = Slice(base, length);
  }

+  // add key to filter if needed
  if (whole_key_filtering_) {
    start_.push_back(entries_.size());
    entries_.append(key.data(), key.size());
  }

-  if (prefix_extractor_ && prefix_extractor_->InDomain(key)) {
+  // add prefix to filter if needed
+  Slice user_key = ExtractUserKey(key);
+  if (prefix_extractor_ && prefix_extractor_->InDomain(user_key)) {
    // this assumes prefix(prefix(key)) == prefix(key), as the last
    // entry in entries_ may be either a key or prefix, and we use
    // prefix(last entry) to get the prefix of the last key.
-    if (prev.size() == 0 || ! SamePrefix(key, prev)) {
-      Slice prefix = prefix_extractor_->Transform(key);
+    if (prev.size() == 0 ||
+        !SamePrefix(user_key, ExtractUserKey(prev))) {
+      Slice prefix = prefix_extractor_->Transform(user_key);
      InternalKey internal_prefix_tmp(prefix, 0, kTypeValue);
      Slice internal_prefix = internal_prefix_tmp.Encode();
      assert(comparator_->Compare(internal_prefix, key) <= 0);
--- a/table/table.cc
+++ b/table/table.cc
@@ -328,6 +328,11 @@ Iterator* Table::BlockReader(void* arg,
 // 1) key.starts_with(prefix(key))
 // 2) Compare(prefix(key), key) <= 0.
 // 3) If Compare(key1, key2) <= 0, then Compare(prefix(key1), prefix(key2)) <= 0
+//
+// TODO(tylerharter): right now, this won't cause I/O since blooms are
+// in memory.  When blooms may need to be paged in, we should refactor so that
+// this is only ever called lazily.  In particular, this shouldn't be called
+// while the DB lock is held like it is now.
 bool Table::PrefixMayMatch(const Slice& internal_prefix) const {
  FilterBlockReader* filter = rep_->filter;
  bool may_match = true;
@@ -337,12 +342,14 @@ bool Table::PrefixMayMatch(const Slice& internal_prefix) const {
    return true;
  }

-  Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
+  std::unique_ptr<Iterator> iiter(rep_->index_block->NewIterator(
+                                           rep_->options.comparator));
  iiter->Seek(internal_prefix);
-  if (! iiter->Valid()) {
+  if (!iiter->Valid()) {
    // we're past end of file
    may_match = false;
-  } else if (iiter->key().starts_with(internal_prefix)) {
+  } else if (ExtractUserKey(iiter->key()).starts_with(
+                                             ExtractUserKey(internal_prefix))) {
    // we need to check for this subtle case because our only
    // guarantee is that "the key is a string >= last key in that data
    // block" according to the doc/table_format.txt spec.
@@ -366,7 +373,12 @@ bool Table::PrefixMayMatch(const Slice& internal_prefix) const {
    assert(s.ok());
    may_match = filter->PrefixMayMatch(handle.offset(), internal_prefix);
  }
-  delete iiter;
+
+  RecordTick(rep_->options.statistics, BLOOM_FILTER_PREFIX_CHECKED);
+  if (!may_match) {
+    RecordTick(rep_->options.statistics, BLOOM_FILTER_PREFIX_USEFUL);
+  }
+
  return may_match;
 }

--- a/table/table.h
+++ b/table/table.h
@@ -47,7 +47,7 @@ class Table {

  ~Table();

-  bool PrefixMayMatch(const Slice& prefix) const;
+  bool PrefixMayMatch(const Slice& internal_prefix) const;

  // Returns a new iterator over the table contents.
  // The result of NewIterator() is initially invalid (caller must