mirror of
https://github.com/XRPLF/rippled.git
synced 2025-12-06 17:27:55 +00:00
Prefix filters for scans (v4)
Summary: Similar to v2 (db and table code understands prefixes), but use ReadOptions as in v3. Also, make the CreateFilter code faster and cleaner. Test Plan: make db_test; export LEVELDB_TESTS=PrefixScan; ./db_test Reviewers: dhruba Reviewed By: dhruba CC: haobo, emayanke Differential Revision: https://reviews.facebook.net/D12027
This commit is contained in:
@@ -23,6 +23,7 @@
|
||||
#include "db/memtable.h"
|
||||
#include "db/memtablelist.h"
|
||||
#include "db/merge_helper.h"
|
||||
#include "db/prefix_filter_iterator.h"
|
||||
#include "db/table_cache.h"
|
||||
#include "db/version_set.h"
|
||||
#include "db/write_batch_internal.h"
|
||||
@@ -2339,12 +2340,19 @@ bool DBImpl::KeyMayExist(const ReadOptions& options,
|
||||
|
||||
Iterator* DBImpl::NewIterator(const ReadOptions& options) {
|
||||
SequenceNumber latest_snapshot;
|
||||
Iterator* internal_iter = NewInternalIterator(options, &latest_snapshot);
|
||||
return NewDBIterator(
|
||||
&dbname_, env_, options_, user_comparator(), internal_iter,
|
||||
(options.snapshot != nullptr
|
||||
? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_
|
||||
: latest_snapshot));
|
||||
Iterator* iter = NewInternalIterator(options, &latest_snapshot);
|
||||
iter = NewDBIterator(
|
||||
&dbname_, env_, options_, user_comparator(), iter,
|
||||
(options.snapshot != nullptr
|
||||
? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_
|
||||
: latest_snapshot));
|
||||
if (options.prefix) {
|
||||
// use extra wrapper to exclude any keys from the results which
|
||||
// don't begin with the prefix
|
||||
iter = new PrefixFilterIterator(iter, *options.prefix,
|
||||
options_.prefix_extractor);
|
||||
}
|
||||
return iter;
|
||||
}
|
||||
|
||||
const Snapshot* DBImpl::GetSnapshot() {
|
||||
|
||||
129
db/db_test.cc
129
db/db_test.cc
@@ -3640,6 +3640,135 @@ TEST(DBTest, MultiGetEmpty) {
|
||||
} while (ChangeCompactOptions());
|
||||
}
|
||||
|
||||
void PrefixScanInit(DBTest *dbtest) {
|
||||
char buf[100];
|
||||
std::string keystr;
|
||||
const int small_range_sstfiles = 5;
|
||||
const int big_range_sstfiles = 5;
|
||||
|
||||
// Generate 11 sst files with the following prefix ranges.
|
||||
// GROUP 0: [0,10] (level 1)
|
||||
// GROUP 1: [1,2], [2,3], [3,4], [4,5], [5, 6] (level 0)
|
||||
// GROUP 2: [0,6], [0,7], [0,8], [0,9], [0,10] (level 0)
|
||||
//
|
||||
// A seek with the previous API would do 11 random I/Os (to all the
|
||||
// files). With the new API and a prefix filter enabled, we should
|
||||
// only do 2 random I/O, to the 2 files containing the key.
|
||||
|
||||
// GROUP 0
|
||||
snprintf(buf, sizeof(buf), "%02d______:start", 0);
|
||||
keystr = std::string(buf);
|
||||
ASSERT_OK(dbtest->Put(keystr, keystr));
|
||||
snprintf(buf, sizeof(buf), "%02d______:end", 10);
|
||||
keystr = std::string(buf);
|
||||
ASSERT_OK(dbtest->Put(keystr, keystr));
|
||||
dbtest->dbfull()->TEST_CompactMemTable();
|
||||
dbtest->dbfull()->CompactRange(nullptr, nullptr); // move to level 1
|
||||
|
||||
// GROUP 1
|
||||
for (int i = 1; i <= small_range_sstfiles; i++) {
|
||||
snprintf(buf, sizeof(buf), "%02d______:start", i);
|
||||
keystr = std::string(buf);
|
||||
ASSERT_OK(dbtest->Put(keystr, keystr));
|
||||
snprintf(buf, sizeof(buf), "%02d______:end", i+1);
|
||||
keystr = std::string(buf);
|
||||
ASSERT_OK(dbtest->Put(keystr, keystr));
|
||||
dbtest->dbfull()->TEST_CompactMemTable();
|
||||
}
|
||||
|
||||
// GROUP 2
|
||||
for (int i = 1; i <= big_range_sstfiles; i++) {
|
||||
std::string keystr;
|
||||
snprintf(buf, sizeof(buf), "%02d______:start", 0);
|
||||
keystr = std::string(buf);
|
||||
ASSERT_OK(dbtest->Put(keystr, keystr));
|
||||
snprintf(buf, sizeof(buf), "%02d______:end",
|
||||
small_range_sstfiles+i+1);
|
||||
keystr = std::string(buf);
|
||||
ASSERT_OK(dbtest->Put(keystr, keystr));
|
||||
dbtest->dbfull()->TEST_CompactMemTable();
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DBTest, PrefixScan) {
|
||||
ReadOptions ro = ReadOptions();
|
||||
int count;
|
||||
Slice prefix;
|
||||
Slice key;
|
||||
char buf[100];
|
||||
Iterator* iter;
|
||||
snprintf(buf, sizeof(buf), "03______:");
|
||||
prefix = Slice(buf, 8);
|
||||
key = Slice(buf, 9);
|
||||
|
||||
// db configs
|
||||
env_->count_random_reads_ = true;
|
||||
Options options = CurrentOptions();
|
||||
options.env = env_;
|
||||
options.block_cache = NewLRUCache(0); // Prevent cache hits
|
||||
options.filter_policy = NewBloomFilterPolicy(10);
|
||||
options.prefix_extractor = NewFixedPrefixTransform(8);
|
||||
options.whole_key_filtering = false;
|
||||
options.disable_auto_compactions = true;
|
||||
options.max_background_compactions = 2;
|
||||
options.create_if_missing = true;
|
||||
options.disable_seek_compaction = true;
|
||||
|
||||
// prefix specified, with blooms: 2 RAND I/Os
|
||||
// SeekToFirst
|
||||
DestroyAndReopen(&options);
|
||||
PrefixScanInit(this);
|
||||
count = 0;
|
||||
env_->random_read_counter_.Reset();
|
||||
ro.prefix = &prefix;
|
||||
iter = db_->NewIterator(ro);
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
assert(iter->key().starts_with(prefix));
|
||||
count++;
|
||||
}
|
||||
ASSERT_TRUE(iter->status().ok());
|
||||
delete iter;
|
||||
ASSERT_EQ(count, 2);
|
||||
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
|
||||
|
||||
// prefix specified, with blooms: 2 RAND I/Os
|
||||
// Seek
|
||||
DestroyAndReopen(&options);
|
||||
PrefixScanInit(this);
|
||||
count = 0;
|
||||
env_->random_read_counter_.Reset();
|
||||
ro.prefix = &prefix;
|
||||
iter = db_->NewIterator(ro);
|
||||
for (iter->Seek(key); iter->Valid(); iter->Next()) {
|
||||
assert(iter->key().starts_with(prefix));
|
||||
count++;
|
||||
}
|
||||
ASSERT_TRUE(iter->status().ok());
|
||||
delete iter;
|
||||
ASSERT_EQ(count, 2);
|
||||
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
|
||||
|
||||
// no prefix specified: 11 RAND I/Os
|
||||
DestroyAndReopen(&options);
|
||||
PrefixScanInit(this);
|
||||
count = 0;
|
||||
env_->random_read_counter_.Reset();
|
||||
iter = db_->NewIterator(ReadOptions());
|
||||
for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
|
||||
if (! iter->key().starts_with(prefix)) {
|
||||
break;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
ASSERT_TRUE(iter->status().ok());
|
||||
delete iter;
|
||||
ASSERT_EQ(count, 2);
|
||||
ASSERT_EQ(env_->random_read_counter_.Read(), 11);
|
||||
Close();
|
||||
delete options.filter_policy;
|
||||
delete options.prefix_extractor;
|
||||
}
|
||||
|
||||
std::string MakeKey(unsigned int num) {
|
||||
char buf[30];
|
||||
snprintf(buf, sizeof(buf), "%016u", num);
|
||||
|
||||
76
db/prefix_filter_iterator.h
Normal file
76
db/prefix_filter_iterator.h
Normal file
@@ -0,0 +1,76 @@
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
//
|
||||
// Wrap an underlying iterator, but exclude any results not starting
|
||||
// with a given prefix. Seeking to keys not beginning with the prefix
|
||||
// is invalid, and SeekToLast is not implemented (that would be
|
||||
// non-trivial), but otherwise this iterator will behave just like the
|
||||
// underlying iterator would if there happened to be no non-matching
|
||||
// keys in the dataset.
|
||||
|
||||
#ifndef STORAGE_LEVELDB_DB_PREFIX_FILTER_ITERATOR_H_
|
||||
#define STORAGE_LEVELDB_DB_PREFIX_FILTER_ITERATOR_H_
|
||||
|
||||
#include "leveldb/iterator.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class PrefixFilterIterator : public Iterator {
|
||||
private:
|
||||
Iterator* iter_;
|
||||
const Slice &prefix_;
|
||||
const SliceTransform *prefix_extractor_;
|
||||
Status status_;
|
||||
|
||||
public:
|
||||
PrefixFilterIterator(Iterator* iter,
|
||||
const Slice &prefix,
|
||||
const SliceTransform* prefix_extractor)
|
||||
: iter_(iter), prefix_(prefix),
|
||||
prefix_extractor_(prefix_extractor),
|
||||
status_(Status::OK()) {
|
||||
if (prefix_extractor == nullptr) {
|
||||
status_ = Status::InvalidArgument("A prefix filter may not be used "
|
||||
"unless a function is also defined "
|
||||
"for extracting prefixes");
|
||||
} else if (!prefix_extractor_->InRange(prefix)) {
|
||||
status_ = Status::InvalidArgument("Must provide a slice for prefix which"
|
||||
"is a prefix for some key");
|
||||
}
|
||||
}
|
||||
~PrefixFilterIterator() {
|
||||
delete iter_;
|
||||
}
|
||||
Slice key() const { return iter_->key(); }
|
||||
Slice value() const { return iter_->value(); }
|
||||
Status status() const {
|
||||
if (!status_.ok()) {
|
||||
return status_;
|
||||
}
|
||||
return iter_->status();
|
||||
}
|
||||
void Next() { iter_->Next(); }
|
||||
void Prev() { iter_->Prev(); }
|
||||
void Seek(const Slice& k) {
|
||||
if (prefix_extractor_->Transform(k) == prefix_) {
|
||||
iter_->Seek(k);
|
||||
} else {
|
||||
status_ = Status::InvalidArgument("Seek must begin with target prefix");
|
||||
}
|
||||
}
|
||||
void SeekToFirst() {
|
||||
Seek(prefix_);
|
||||
}
|
||||
void SeekToLast() {
|
||||
status_ = Status::NotSupported("SeekToLast is incompatible with prefixes");
|
||||
}
|
||||
bool Valid() const {
|
||||
return (status_.ok() && iter_->Valid() &&
|
||||
prefix_extractor_->Transform(iter_->key()) == prefix_);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user