Follow-up Cleaning-up After D13521

Summary:
This patch is to address @haobo's comments on D13521:
1. rename Table to be TableReader and make its factory function to be GetTableReader
2. move the compression type selection logic out of TableBuilder but to compaction logic
3. more accurate comments
4. Move stat name constants into BlockBasedTable implementation.
5. remove some uncleaned codes in simple_table_db_test

Test Plan: pass test suites.

Reviewers: haobo, dhruba, kailiu

Reviewed By: haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D13785
This commit is contained in:
Siying Dong
2013-10-30 10:52:33 -07:00
parent 068a819ac9
commit f03b2df010
19 changed files with 349 additions and 654 deletions

View File

@@ -17,7 +17,7 @@
#include "rocksdb/env.h"
#include "rocksdb/filter_policy.h"
#include "rocksdb/options.h"
#include "table/block_based_table.h"
#include "table/block_based_table_reader.h"
#include "table/block_builder.h"
#include "table/filter_block.h"
#include "table/format.h"
@@ -81,8 +81,7 @@ struct BlockBasedTableBuilder::Rep {
BlockBuilder data_block;
BlockBuilder index_block;
std::string last_key;
// Whether enable compression in this table.
bool enable_compression;
CompressionType compression_type;
uint64_t num_entries = 0;
uint64_t num_data_blocks = 0;
@@ -107,13 +106,13 @@ struct BlockBasedTableBuilder::Rep {
std::string compressed_output;
Rep(const Options& opt, WritableFile* f, bool enable_compression)
Rep(const Options& opt, WritableFile* f, CompressionType compression_type)
: options(opt),
index_block_options(opt),
file(f),
data_block(&options),
index_block(1, index_block_options.comparator),
enable_compression(enable_compression),
compression_type(compression_type),
filter_block(opt.filter_policy == nullptr ? nullptr
: new FilterBlockBuilder(opt)),
pending_index_entry(false) {
@@ -121,9 +120,9 @@ struct BlockBasedTableBuilder::Rep {
};
BlockBasedTableBuilder::BlockBasedTableBuilder(const Options& options,
WritableFile* file, int level,
const bool enable_compression)
: TableBuilder(level), rep_(new Rep(options, file, enable_compression)) {
WritableFile* file,
CompressionType compression_type)
: rep_(new Rep(options, file, compression_type)) {
if (rep_->filter_block != nullptr) {
rep_->filter_block->StartBlock(0);
}
@@ -220,26 +219,7 @@ void BlockBasedTableBuilder::WriteBlock(BlockBuilder* block,
Slice block_contents;
std::string* compressed = &r->compressed_output;
CompressionType type;
if (!r->enable_compression) {
// disable compression
type = kNoCompression;
} else {
// If the use has specified a different compression level for each level,
// then pick the compresison for that level.
if (!r->options.compression_per_level.empty()) {
const int n = r->options.compression_per_level.size();
// It is possible for level_ to be -1; in that case, we use level
// 0's compression. This occurs mostly in backwards compatibility
// situations when the builder doesn't know what level the file
// belongs to. Likewise, if level_ is beyond the end of the
// specified compression levels, use the last value.
type = r->options.compression_per_level[std::max(0,
std::min(level_, n))];
} else {
type = r->options.compression;
}
}
CompressionType type = r->compression_type;
switch (type) {
case kNoCompression:
block_contents = raw;
@@ -376,19 +356,21 @@ Status BlockBasedTableBuilder::Finish() {
BytewiseSortedMap stats;
// Add basic stats
AddStats(stats, TableStatsNames::kRawKeySize, r->raw_key_size);
AddStats(stats, TableStatsNames::kRawValueSize, r->raw_value_size);
AddStats(stats, TableStatsNames::kDataSize, r->data_size);
AddStats(stats, BlockBasedTableStatsNames::kRawKeySize, r->raw_key_size);
AddStats(stats, BlockBasedTableStatsNames::kRawValueSize,
r->raw_value_size);
AddStats(stats, BlockBasedTableStatsNames::kDataSize, r->data_size);
AddStats(
stats,
TableStatsNames::kIndexSize,
BlockBasedTableStatsNames::kIndexSize,
r->index_block.CurrentSizeEstimate() + kBlockTrailerSize
);
AddStats(stats, TableStatsNames::kNumEntries, r->num_entries);
AddStats(stats, TableStatsNames::kNumDataBlocks, r->num_data_blocks);
AddStats(stats, BlockBasedTableStatsNames::kNumEntries, r->num_entries);
AddStats(stats, BlockBasedTableStatsNames::kNumDataBlocks,
r->num_data_blocks);
if (r->filter_block != nullptr) {
stats.insert(std::make_pair(
TableStatsNames::kFilterPolicy,
BlockBasedTableStatsNames::kFilterPolicy,
r->options.filter_policy->Name()
));
}

View File

@@ -11,24 +11,22 @@
#include <stdint.h>
#include "rocksdb/options.h"
#include "rocksdb/status.h"
#include "rocksdb/table.h"
namespace rocksdb {
class BlockBuilder;
class BlockHandle;
class WritableFile;
class TableBuilder;
class BlockBasedTableBuilder : public TableBuilder {
public:
// Create a builder that will store the contents of the table it is
// building in *file. Does not close the file. It is up to the
// caller to close the file after calling Finish(). The output file
// will be part of level specified by 'level'. A value of -1 means
// that the caller does not know which level the output file will reside.
// caller to close the file after calling Finish().
BlockBasedTableBuilder(const Options& options, WritableFile* file,
int level = -1, const bool enable_compression = true);
CompressionType compression_type);
// REQUIRES: Either Finish() or Abandon() has been called.
~BlockBasedTableBuilder();

View File

@@ -13,24 +13,22 @@
#include <memory>
#include <stdint.h>
#include "table/block_based_table_builder.h"
#include "table/block_based_table.h"
#include "table/block_based_table_reader.h"
#include "port/port.h"
namespace rocksdb {
Status BlockBasedTableFactory::OpenTable(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file,
uint64_t file_size,
unique_ptr<Table>* table) const {
Status BlockBasedTableFactory::GetTableReader(
const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const {
return BlockBasedTable::Open(options, soptions, std::move(file), file_size,
table);
table_reader);
}
TableBuilder* BlockBasedTableFactory::GetTableBuilder(
const Options& options, WritableFile* file, int level,
const bool enable_compression) const {
return new BlockBasedTableBuilder(options, file, level, enable_compression);
const Options& options, WritableFile* file,
CompressionType compression_type) const {
return new BlockBasedTableBuilder(options, file, compression_type);
}
} // namespace rocksdb

View File

@@ -36,12 +36,13 @@ public:
const char* Name() const override {
return "BlockBasedTable";
}
Status OpenTable(const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<Table>* table) const override;
Status GetTableReader(const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file,
uint64_t file_size,
unique_ptr<TableReader>* table_reader) const override;
TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
int level, const bool enable_compression) const
CompressionType compression_type) const
override;
};

View File

@@ -7,7 +7,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "table/block_based_table.h"
#include "table/block_based_table_reader.h"
#include "db/dbformat.h"
@@ -113,8 +113,8 @@ Status BlockBasedTable::Open(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file,
uint64_t size,
unique_ptr<Table>* table) {
table->reset();
unique_ptr<TableReader>* table_reader) {
table_reader->reset();
if (size < Footer::kEncodedLength) {
return Status::InvalidArgument("file is too short to be an sstable");
}
@@ -151,8 +151,8 @@ Status BlockBasedTable::Open(const Options& options,
SetupCacheKeyPrefix(rep);
rep->filter_data = nullptr;
rep->filter = nullptr;
table->reset(new BlockBasedTable(rep));
((BlockBasedTable*) (table->get()))->ReadMeta(footer);
table_reader->reset(new BlockBasedTable(rep));
((BlockBasedTable*) (table_reader->get()))->ReadMeta(footer);
} else {
if (index_block) delete index_block;
}
@@ -275,12 +275,12 @@ Status BlockBasedTable::ReadStats(const Slice& handle_value, Rep* rep) {
auto& table_stats = rep->table_stats;
// All pre-defined stats of type uint64_t
std::unordered_map<std::string, uint64_t*> predefined_uint64_stats = {
{ TableStatsNames::kDataSize, &table_stats.data_size },
{ TableStatsNames::kIndexSize, &table_stats.index_size },
{ TableStatsNames::kRawKeySize, &table_stats.raw_key_size },
{ TableStatsNames::kRawValueSize, &table_stats.raw_value_size },
{ TableStatsNames::kNumDataBlocks, &table_stats.num_data_blocks },
{ TableStatsNames::kNumEntries, &table_stats.num_entries },
{ BlockBasedTableStatsNames::kDataSize, &table_stats.data_size },
{ BlockBasedTableStatsNames::kIndexSize, &table_stats.index_size },
{ BlockBasedTableStatsNames::kRawKeySize, &table_stats.raw_key_size },
{ BlockBasedTableStatsNames::kRawValueSize, &table_stats.raw_value_size },
{ BlockBasedTableStatsNames::kNumDataBlocks, &table_stats.num_data_blocks},
{ BlockBasedTableStatsNames::kNumEntries, &table_stats.num_entries },
};
std::string last_key;
@@ -313,7 +313,7 @@ Status BlockBasedTable::ReadStats(const Slice& handle_value, Rep* rep) {
continue;
}
*(pos->second) = val;
} else if (key == TableStatsNames::kFilterPolicy) {
} else if (key == BlockBasedTableStatsNames::kFilterPolicy) {
table_stats.filter_policy_name = raw_val.ToString();
} else {
// handle user-collected
@@ -464,7 +464,7 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
// we're past end of file
may_match = false;
} else if (ExtractUserKey(iiter->key()).starts_with(
ExtractUserKey(internal_prefix))) {
ExtractUserKey(internal_prefix))) {
// we need to check for this subtle case because our only
// guarantee is that "the key is a string >= last key in that data
// block" according to the doc/table_format.txt spec.
@@ -497,10 +497,6 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
return may_match;
}
Iterator* Table::NewIterator(const ReadOptions& options) {
return nullptr;
}
Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) {
if (options.prefix) {
InternalKey internal_prefix(*options.prefix, 0, kTypeValue);
@@ -517,21 +513,23 @@ Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) {
options, rep_->soptions);
}
Status BlockBasedTable::Get(const ReadOptions& options, const Slice& k,
void* arg,
bool (*saver)(void*, const Slice&, const Slice&,
bool),
void (*mark_key_may_exist)(void*)) {
Status BlockBasedTable::Get(
const ReadOptions& readOptions,
const Slice& key,
void* handle_context,
bool (*result_handler)(void* handle_context, const Slice& k,
const Slice& v, bool didIO),
void (*mark_key_may_exist_handler)(void* handle_context)) {
Status s;
Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
bool done = false;
for (iiter->Seek(k); iiter->Valid() && !done; iiter->Next()) {
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
Slice handle_value = iiter->value();
FilterBlockReader* filter = rep_->filter;
BlockHandle handle;
if (filter != nullptr &&
handle.DecodeFrom(&handle_value).ok() &&
!filter->KeyMayMatch(handle.offset(), k)) {
!filter->KeyMayMatch(handle.offset(), key)) {
// Not found
// TODO: think about interaction with Merge. If a user key cannot
// cross one data block, we should be fine.
@@ -540,19 +538,20 @@ Status BlockBasedTable::Get(const ReadOptions& options, const Slice& k,
} else {
bool didIO = false;
std::unique_ptr<Iterator> block_iter(
BlockReader(this, options, iiter->value(), &didIO));
BlockReader(this, readOptions, iiter->value(), &didIO));
if (options.read_tier && block_iter->status().IsIncomplete()) {
if (readOptions.read_tier && block_iter->status().IsIncomplete()) {
// couldn't get block from block_cache
// Update Saver.state to Found because we are only looking for whether
// we can guarantee the key is not there when "no_io" is set
(*mark_key_may_exist)(arg);
(*mark_key_may_exist_handler)(handle_context);
break;
}
// Call the *saver function on each entry/block until it returns false
for (block_iter->Seek(k); block_iter->Valid(); block_iter->Next()) {
if (!(*saver)(arg, block_iter->key(), block_iter->value(), didIO)) {
for (block_iter->Seek(key); block_iter->Valid(); block_iter->Next()) {
if (!(*result_handler)(handle_context, block_iter->key(),
block_iter->value(), didIO)) {
done = true;
break;
}
@@ -611,12 +610,17 @@ uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key) {
const std::string BlockBasedTable::kFilterBlockPrefix = "filter.";
const std::string BlockBasedTable::kStatsBlock = "rocksdb.stats";
const std::string TableStatsNames::kDataSize = "rocksdb.data.size";
const std::string TableStatsNames::kIndexSize = "rocksdb.index.size";
const std::string TableStatsNames::kRawKeySize = "rocksdb.raw.key.size";
const std::string TableStatsNames::kRawValueSize = "rocksdb.raw.value.size";
const std::string TableStatsNames::kNumDataBlocks = "rocksdb.num.data.blocks";
const std::string TableStatsNames::kNumEntries = "rocksdb.num.entries";
const std::string TableStatsNames::kFilterPolicy = "rocksdb.filter.policy";
const std::string BlockBasedTableStatsNames::kDataSize = "rocksdb.data.size";
const std::string BlockBasedTableStatsNames::kIndexSize = "rocksdb.index.size";
const std::string BlockBasedTableStatsNames::kRawKeySize =
"rocksdb.raw.key.size";
const std::string BlockBasedTableStatsNames::kRawValueSize =
"rocksdb.raw.value.size";
const std::string BlockBasedTableStatsNames::kNumDataBlocks =
"rocksdb.num.data.blocks";
const std::string BlockBasedTableStatsNames::kNumEntries =
"rocksdb.num.entries";
const std::string BlockBasedTableStatsNames::kFilterPolicy =
"rocksdb.filter.policy";
} // namespace rocksdb

View File

@@ -24,14 +24,14 @@ struct Options;
class RandomAccessFile;
struct ReadOptions;
class TableCache;
class Table;
class TableReader;
using std::unique_ptr;
// A Table is a sorted map from strings to strings. Tables are
// immutable and persistent. A Table may be safely accessed from
// multiple threads without external synchronization.
class BlockBasedTable : public Table {
class BlockBasedTable : public TableReader {
public:
static const std::string kFilterBlockPrefix;
static const std::string kStatsBlock;
@@ -40,19 +40,17 @@ class BlockBasedTable : public Table {
// of "file", and read the metadata entries necessary to allow
// retrieving data from the table.
//
// If successful, returns ok and sets "*table" to the newly opened
// table. The client should delete "*table" when no longer needed.
// If there was an error while initializing the table, sets "*table"
// to nullptr and returns a non-ok status. Does not take ownership of
// "*source", but the client must ensure that "source" remains live
// for the duration of the returned table's lifetime.
// If successful, returns ok and sets "*table_reader" to the newly opened
// table. The client should delete "*table_reader" when no longer needed.
// If there was an error while initializing the table, sets "*table_reader"
// to nullptr and returns a non-ok status.
//
// *file must remain live while this Table is in use.
static Status Open(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile>&& file,
uint64_t file_size,
unique_ptr<Table>* table);
unique_ptr<TableReader>* table_reader);
bool PrefixMayMatch(const Slice& internal_prefix) override;
@@ -62,10 +60,13 @@ class BlockBasedTable : public Table {
Iterator* NewIterator(const ReadOptions&) override;
Status Get(
const ReadOptions&, const Slice& key,
void* arg,
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool),
void (*mark_key_may_exist)(void*) = nullptr) override;
const ReadOptions& readOptions,
const Slice& key,
void* handle_context,
bool (*result_handler)(void* handle_context, const Slice& k,
const Slice& v, bool didIO),
void (*mark_key_may_exist_handler)(void* handle_context) = nullptr)
override;
// Given a key, return an approximate byte offset in the file where
// the data for that key begins (or would begin if the key were
@@ -115,8 +116,18 @@ class BlockBasedTable : public Table {
}
// No copying allowed
explicit BlockBasedTable(const Table&) = delete;
void operator=(const Table&) = delete;
explicit BlockBasedTable(const TableReader&) = delete;
void operator=(const TableReader&) = delete;
};
struct BlockBasedTableStatsNames {
static const std::string kDataSize;
static const std::string kIndexSize;
static const std::string kRawKeySize;
static const std::string kRawValueSize;
static const std::string kNumDataBlocks;
static const std::string kNumEntries;
static const std::string kFilterPolicy;
};
} // namespace rocksdb

View File

@@ -22,7 +22,7 @@
#include "table/block.h"
#include "table/block_builder.h"
#include "table/format.h"
#include "table/block_based_table.h"
#include "table/block_based_table_reader.h"
#include "table/block_based_table_builder.h"
#include "util/random.h"
#include "util/testharness.h"
@@ -250,7 +250,7 @@ class BlockBasedTableConstructor: public Constructor {
virtual Status FinishImpl(const Options& options, const KVMap& data) {
Reset();
sink_.reset(new StringSink());
BlockBasedTableBuilder builder(options, sink_.get());
BlockBasedTableBuilder builder(options, sink_.get(), options.compression);
for (KVMap::const_iterator it = data.begin();
it != data.end();
@@ -267,36 +267,36 @@ class BlockBasedTableConstructor: public Constructor {
uniq_id_ = cur_uniq_id_++;
source_.reset(new StringSource(sink_->contents(), uniq_id_));
unique_ptr<TableFactory> table_factory;
return options.table_factory->OpenTable(options, soptions,
std::move(source_),
sink_->contents().size(),
&table_);
return options.table_factory->GetTableReader(options, soptions,
std::move(source_),
sink_->contents().size(),
&table_reader_);
}
virtual Iterator* NewIterator() const {
return table_->NewIterator(ReadOptions());
return table_reader_->NewIterator(ReadOptions());
}
uint64_t ApproximateOffsetOf(const Slice& key) const {
return table_->ApproximateOffsetOf(key);
return table_reader_->ApproximateOffsetOf(key);
}
virtual Status Reopen(const Options& options) {
source_.reset(new StringSource(sink_->contents(), uniq_id_));
return options.table_factory->OpenTable(options, soptions,
std::move(source_),
sink_->contents().size(),
&table_);
return options.table_factory->GetTableReader(options, soptions,
std::move(source_),
sink_->contents().size(),
&table_reader_);
}
virtual Table* table() {
return table_.get();
virtual TableReader* table_reader() {
return table_reader_.get();
}
private:
void Reset() {
uniq_id_ = 0;
table_.reset();
table_reader_.reset();
sink_.reset();
source_.reset();
}
@@ -304,7 +304,7 @@ class BlockBasedTableConstructor: public Constructor {
uint64_t uniq_id_;
unique_ptr<StringSink> sink_;
unique_ptr<StringSource> source_;
unique_ptr<Table> table_;
unique_ptr<TableReader> table_reader_;
BlockBasedTableConstructor();
@@ -883,7 +883,7 @@ TEST(TableTest, BasicTableStats) {
c.Finish(options, &keys, &kvmap);
auto& stats = c.table()->GetTableStats();
auto& stats = c.table_reader()->GetTableStats();
ASSERT_EQ(kvmap.size(), stats.num_entries);
auto raw_key_size = kvmap.size() * 2ul;
@@ -918,7 +918,7 @@ TEST(TableTest, FilterPolicyNameStats) {
options.filter_policy = filter_policy.get();
c.Finish(options, &keys, &kvmap);
auto& stats = c.table()->GetTableStats();
auto& stats = c.table_reader()->GetTableStats();
ASSERT_EQ("rocksdb.BuiltinBloomFilter", stats.filter_policy_name);
}
@@ -960,7 +960,7 @@ TEST(TableTest, IndexSizeStat) {
c.Finish(options, &ks, &kvmap);
auto index_size =
c.table()->GetTableStats().index_size;
c.table_reader()->GetTableStats().index_size;
ASSERT_GT(index_size, last_index_size);
last_index_size = index_size;
}
@@ -985,7 +985,7 @@ TEST(TableTest, NumBlockStat) {
c.Finish(options, &ks, &kvmap);
ASSERT_EQ(
kvmap.size(),
c.table()->GetTableStats().num_data_blocks
c.table_reader()->GetTableStats().num_data_blocks
);
}
@@ -1100,7 +1100,7 @@ TEST(TableTest, BlockCacheLeak) {
ASSERT_OK(c.Reopen(opt));
for (const std::string& key: keys) {
ASSERT_TRUE(c.table()->TEST_KeyInCache(ReadOptions(), key));
ASSERT_TRUE(c.table_reader()->TEST_KeyInCache(ReadOptions(), key));
}
}