Prefix scan: db_bench and bug fixes

Summary: If use_prefix_filters is set and read_range>1, then the random seeks will set a the prefix filter to be the prefix of the key which was randomly selected as the target.  Still need to add statistics (perhaps in a separate diff).

Test Plan: ./db_bench --benchmarks=fillseq,prefixscanrandom --num=10000000 --statistics=1 --use_prefix_blooms=1 --use_prefix_api=1 --bloom_bits=10

Reviewers: dhruba

Reviewed By: dhruba

CC: leveldb, haobo

Differential Revision: https://reviews.facebook.net/D12273
This commit is contained in:
Tyler Harter
2013-08-22 16:06:50 -07:00
parent 60bf2b7d4a
commit c2bd8f4824
10 changed files with 164 additions and 16 deletions

View File

@@ -45,6 +45,7 @@ bool FilterBlockBuilder::SamePrefix(const Slice &key1,
}
void FilterBlockBuilder::AddKey(const Slice& key) {
// get slice for most recently added entry
Slice prev;
if (start_.size() > 0) {
size_t prev_start = start_[start_.size() - 1];
@@ -53,17 +54,21 @@ void FilterBlockBuilder::AddKey(const Slice& key) {
prev = Slice(base, length);
}
// add key to filter if needed
if (whole_key_filtering_) {
start_.push_back(entries_.size());
entries_.append(key.data(), key.size());
}
if (prefix_extractor_ && prefix_extractor_->InDomain(key)) {
// add prefix to filter if needed
Slice user_key = ExtractUserKey(key);
if (prefix_extractor_ && prefix_extractor_->InDomain(user_key)) {
// this assumes prefix(prefix(key)) == prefix(key), as the last
// entry in entries_ may be either a key or prefix, and we use
// prefix(last entry) to get the prefix of the last key.
if (prev.size() == 0 || ! SamePrefix(key, prev)) {
Slice prefix = prefix_extractor_->Transform(key);
if (prev.size() == 0 ||
!SamePrefix(user_key, ExtractUserKey(prev))) {
Slice prefix = prefix_extractor_->Transform(user_key);
InternalKey internal_prefix_tmp(prefix, 0, kTypeValue);
Slice internal_prefix = internal_prefix_tmp.Encode();
assert(comparator_->Compare(internal_prefix, key) <= 0);

View File

@@ -328,6 +328,11 @@ Iterator* Table::BlockReader(void* arg,
// 1) key.starts_with(prefix(key))
// 2) Compare(prefix(key), key) <= 0.
// 3) If Compare(key1, key2) <= 0, then Compare(prefix(key1), prefix(key2)) <= 0
//
// TODO(tylerharter): right now, this won't cause I/O since blooms are
// in memory. When blooms may need to be paged in, we should refactor so that
// this is only ever called lazily. In particular, this shouldn't be called
// while the DB lock is held like it is now.
bool Table::PrefixMayMatch(const Slice& internal_prefix) const {
FilterBlockReader* filter = rep_->filter;
bool may_match = true;
@@ -337,12 +342,14 @@ bool Table::PrefixMayMatch(const Slice& internal_prefix) const {
return true;
}
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
std::unique_ptr<Iterator> iiter(rep_->index_block->NewIterator(
rep_->options.comparator));
iiter->Seek(internal_prefix);
if (! iiter->Valid()) {
if (!iiter->Valid()) {
// we're past end of file
may_match = false;
} else if (iiter->key().starts_with(internal_prefix)) {
} else if (ExtractUserKey(iiter->key()).starts_with(
ExtractUserKey(internal_prefix))) {
// we need to check for this subtle case because our only
// guarantee is that "the key is a string >= last key in that data
// block" according to the doc/table_format.txt spec.
@@ -366,7 +373,12 @@ bool Table::PrefixMayMatch(const Slice& internal_prefix) const {
assert(s.ok());
may_match = filter->PrefixMayMatch(handle.offset(), internal_prefix);
}
delete iiter;
RecordTick(rep_->options.statistics, BLOOM_FILTER_PREFIX_CHECKED);
if (!may_match) {
RecordTick(rep_->options.statistics, BLOOM_FILTER_PREFIX_USEFUL);
}
return may_match;
}

View File

@@ -47,7 +47,7 @@ class Table {
~Table();
bool PrefixMayMatch(const Slice& prefix) const;
bool PrefixMayMatch(const Slice& internal_prefix) const;
// Returns a new iterator over the table contents.
// The result of NewIterator() is initially invalid (caller must