Merge branch 'master' of github.com:facebook/rocksdb into inplace

This commit is contained in:
Naman Gupta
2013-10-31 11:51:03 -07:00
31 changed files with 1418 additions and 313 deletions

View File

@@ -15,12 +15,23 @@
#include "db/table_cache.h"
#include "db/version_edit.h"
#include "rocksdb/db.h"
#include "rocksdb/table.h"
#include "rocksdb/env.h"
#include "rocksdb/iterator.h"
#include "rocksdb/options.h"
#include "table/block_based_table_builder.h"
#include "util/stop_watch.h"
namespace rocksdb {
class TableFactory;
TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
CompressionType compression_type) {
return options.table_factory->GetTableBuilder(options, file,
compression_type);
}
Status BuildTable(const std::string& dbname,
Env* env,
const Options& options,
@@ -52,8 +63,9 @@ Status BuildTable(const std::string& dbname,
if (!s.ok()) {
return s;
}
TableBuilder* builder = new TableBuilder(options, file.get(), 0,
enable_compression);
TableBuilder* builder = GetTableBuilder(options, file.get(),
options.compression);
// the first key is the smallest key
Slice key = iter->key();

View File

@@ -9,6 +9,7 @@
#include "rocksdb/comparator.h"
#include "rocksdb/status.h"
#include "rocksdb/types.h"
#include "rocksdb/options.h"
namespace rocksdb {
@@ -20,6 +21,12 @@ class EnvOptions;
class Iterator;
class TableCache;
class VersionEdit;
class TableBuilder;
class WritableFile;
extern TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
CompressionType compression_type);
// Build a Table file from the contents of *iter. The generated file
// will be named according to meta->number. On success, the rest of

View File

@@ -15,12 +15,12 @@
#include <sys/types.h>
#include "rocksdb/cache.h"
#include "rocksdb/env.h"
#include "rocksdb/table.h"
#include "rocksdb/write_batch.h"
#include "db/db_impl.h"
#include "db/filename.h"
#include "db/log_format.h"
#include "db/version_set.h"
#include "table/table.h"
#include "util/logging.h"
#include "util/testharness.h"
#include "util/testutil.h"

View File

@@ -41,10 +41,10 @@
#include "rocksdb/merge_operator.h"
#include "rocksdb/statistics.h"
#include "rocksdb/status.h"
#include "rocksdb/table_builder.h"
#include "rocksdb/table.h"
#include "port/port.h"
#include "table/block.h"
#include "table/merger.h"
#include "table/table.h"
#include "table/two_level_iterator.h"
#include "util/auto_roll_logger.h"
#include "util/build_version.h"
@@ -211,6 +211,27 @@ Options SanitizeOptions(const std::string& dbname,
return result;
}
CompressionType GetCompressionType(const Options& options, int level,
const bool enable_compression) {
if (!enable_compression) {
// disable compression
return kNoCompression;
}
// If the use has specified a different compression level for each level,
// then pick the compresison for that level.
if (!options.compression_per_level.empty()) {
const int n = options.compression_per_level.size() - 1;
// It is possible for level_ to be -1; in that case, we use level
// 0's compression. This occurs mostly in backwards compatibility
// situations when the builder doesn't know what level the file
// belongs to. Likewise, if level_ is beyond the end of the
// specified compression levels, use the last value.
return options.compression_per_level[std::max(0, std::min(level, n))];
} else {
return options.compression;
}
}
DBImpl::DBImpl(const Options& options, const std::string& dbname)
: env_(options.env),
dbname_(dbname),
@@ -1774,9 +1795,12 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) {
compact->outfile->SetPreallocationBlockSize(
1.1 * versions_->MaxFileSizeForLevel(compact->compaction->output_level()));
compact->builder.reset(new TableBuilder(options_, compact->outfile.get(),
compact->compaction->output_level(),
compact->compaction->enable_compression()));
CompressionType compression_type = GetCompressionType(
options_, compact->compaction->output_level(),
compact->compaction->enable_compression());
compact->builder.reset(
GetTableBuilder(options_, compact->outfile.get(), compression_type));
}
return s;
}
@@ -2026,9 +2050,9 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
compaction_filter_value.clear();
bool to_delete =
compaction_filter->Filter(compact->compaction->level(),
ikey.user_key, value,
&compaction_filter_value,
&value_changed);
ikey.user_key, value,
&compaction_filter_value,
&value_changed);
if (to_delete) {
// make a copy of the original key
delete_key.assign(key.data(), key.data() + key.size());

View File

@@ -444,4 +444,13 @@ extern Options SanitizeOptions(const std::string& db,
const InternalFilterPolicy* ipolicy,
const Options& src);
// Determine compression type, based on user options, level of the output
// file and whether compression is disabled.
// If enable_compression is false, then compression is always disabled no
// matter what the values of the other two parameters are.
// Otherwise, the compression type is determined based on options and level.
CompressionType GetCompressionType(const Options& options, int level,
const bool enable_compression);
} // namespace rocksdb

View File

@@ -29,11 +29,10 @@
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/status.h"
#include "rocksdb/table_builder.h"
#include "rocksdb/table.h"
#include "port/port.h"
#include "table/block.h"
#include "table/merger.h"
#include "table/table.h"
#include "table/two_level_iterator.h"
#include "util/coding.h"
#include "util/logging.h"

View File

@@ -20,7 +20,7 @@
#include "rocksdb/cache.h"
#include "rocksdb/compaction_filter.h"
#include "rocksdb/env.h"
#include "table/table.h"
#include "rocksdb/table.h"
#include "util/hash.h"
#include "util/logging.h"
#include "util/mutexlock.h"

View File

@@ -13,7 +13,7 @@
#include "rocksdb/db.h"
#include "rocksdb/filter_policy.h"
#include "rocksdb/slice.h"
#include "rocksdb/table_builder.h"
#include "rocksdb/table.h"
#include "rocksdb/types.h"
#include "util/coding.h"
#include "util/logging.h"

793
db/simple_table_db_test.cc Normal file
View File

@@ -0,0 +1,793 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <algorithm>
#include <set>
#include "rocksdb/db.h"
#include "rocksdb/filter_policy.h"
#include "db/db_impl.h"
#include "db/filename.h"
#include "db/version_set.h"
#include "db/write_batch_internal.h"
#include "db/db_statistics.h"
#include "rocksdb/cache.h"
#include "rocksdb/compaction_filter.h"
#include "rocksdb/env.h"
#include "rocksdb/table.h"
#include "util/hash.h"
#include "util/logging.h"
#include "util/mutexlock.h"
#include "util/testharness.h"
#include "util/testutil.h"
#include "utilities/merge_operators.h"
using std::unique_ptr;
namespace rocksdb {
// SimpleTable is a simple table format for UNIT TEST ONLY. It is not built
// as production quality.
// SimpleTable requires the input key size to be fixed 16 bytes, value cannot
// be longer than 150000 bytes and stored data on disk in this format:
// +--------------------------------------------+ <= key1 offset
// | key1 | value_size (4 bytes) | |
// +----------------------------------------+ |
// | value1 |
// | |
// +----------------------------------------+---+ <= key2 offset
// | key2 | value_size (4 bytes) | |
// +----------------------------------------+ |
// | value2 |
// | |
// | ...... |
// +-----------------+--------------------------+ <= index_block_offset
// | key1 | key1 offset (8 bytes) |
// +-----------------+--------------------------+
// | key2 | key2 offset (8 bytes) |
// +-----------------+--------------------------+
// | key3 | key3 offset (8 bytes) |
// +-----------------+--------------------------+
// | ...... |
// +-----------------+------------+-------------+
// | index_block_offset (8 bytes) |
// +------------------------------+
// SimpleTable is a simple table format for UNIT TEST ONLY. It is not built
// as production quality.
class SimpleTableReader: public TableReader {
public:
// Attempt to open the table that is stored in bytes [0..file_size)
// of "file", and read the metadata entries necessary to allow
// retrieving data from the table.
//
// If successful, returns ok and sets "*table" to the newly opened
// table. The client should delete "*table" when no longer needed.
// If there was an error while initializing the table, sets "*table"
// to nullptr and returns a non-ok status. Does not take ownership of
// "*source", but the client must ensure that "source" remains live
// for the duration of the returned table's lifetime.
//
// *file must remain live while this Table is in use.
static Status Open(const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<TableReader>* table_reader);
bool PrefixMayMatch(const Slice& internal_prefix) override;
Iterator* NewIterator(const ReadOptions&) override;
Status Get(
const ReadOptions&, const Slice& key, void* arg,
bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool),
void (*mark_key_may_exist)(void*) = nullptr) override;
uint64_t ApproximateOffsetOf(const Slice& key) override;
bool TEST_KeyInCache(const ReadOptions& options, const Slice& key) override;
void SetupForCompaction() override;
TableStats& GetTableStats() override;
~SimpleTableReader();
private:
struct Rep;
Rep* rep_;
explicit SimpleTableReader(Rep* rep) {
rep_ = rep;
}
friend class TableCache;
friend class SimpleTableIterator;
Status GetOffset(const Slice& target, uint64_t* offset);
// No copying allowed
explicit SimpleTableReader(const TableReader&) = delete;
void operator=(const TableReader&) = delete;
};
// Iterator to iterate SimpleTable
class SimpleTableIterator: public Iterator {
public:
explicit SimpleTableIterator(SimpleTableReader* table);
~SimpleTableIterator();
bool Valid() const;
void SeekToFirst();
void SeekToLast();
void Seek(const Slice& target);
void Next();
void Prev();
Slice key() const;
Slice value() const;
Status status() const;
private:
SimpleTableReader* table_;
uint64_t offset_;
uint64_t next_offset_;
Slice key_;
Slice value_;
char tmp_str_[4];
char* key_str_;
char* value_str_;
int value_str_len_;
Status status_;
// No copying allowed
SimpleTableIterator(const SimpleTableIterator&) = delete;
void operator=(const Iterator&) = delete;
};
struct SimpleTableReader::Rep {
~Rep() {
}
Rep(const EnvOptions& storage_options, uint64_t index_start_offset,
int num_entries) :
soptions(storage_options), index_start_offset(index_start_offset),
num_entries(num_entries) {
}
Options options;
const EnvOptions& soptions;
Status status;
unique_ptr<RandomAccessFile> file;
uint64_t index_start_offset;
int num_entries;
TableStats table_stats;
const static int user_key_size = 16;
const static int offset_length = 8;
const static int key_footer_len = 8;
static int GetInternalKeyLength() {
return user_key_size + key_footer_len;
}
};
SimpleTableReader::~SimpleTableReader() {
delete rep_;
}
Status SimpleTableReader::Open(const Options& options,
const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file,
uint64_t size,
unique_ptr<TableReader>* table_reader) {
char footer_space[Rep::offset_length];
Slice footer_input;
Status s = file->Read(size - Rep::offset_length, Rep::offset_length,
&footer_input, footer_space);
if (s.ok()) {
uint64_t index_start_offset = DecodeFixed64(footer_space);
int num_entries = (size - Rep::offset_length - index_start_offset)
/ (Rep::GetInternalKeyLength() + Rep::offset_length);
SimpleTableReader::Rep* rep = new SimpleTableReader::Rep(soptions,
index_start_offset,
num_entries);
rep->file = std::move(file);
rep->options = options;
table_reader->reset(new SimpleTableReader(rep));
}
return s;
}
void SimpleTableReader::SetupForCompaction() {
}
TableStats& SimpleTableReader::GetTableStats() {
return rep_->table_stats;
}
bool SimpleTableReader::PrefixMayMatch(const Slice& internal_prefix) {
return true;
}
Iterator* SimpleTableReader::NewIterator(const ReadOptions& options) {
return new SimpleTableIterator(this);
}
Status SimpleTableReader::GetOffset(const Slice& target, uint64_t* offset) {
uint32_t left = 0;
uint32_t right = rep_->num_entries - 1;
char key_chars[Rep::GetInternalKeyLength()];
Slice tmp_slice;
uint32_t target_offset = 0;
while (left <= right) {
uint32_t mid = (left + right + 1) / 2;
uint64_t offset_to_read = rep_->index_start_offset
+ (Rep::GetInternalKeyLength() + Rep::offset_length) * mid;
Status s = rep_->file->Read(offset_to_read, Rep::GetInternalKeyLength(),
&tmp_slice, key_chars);
if (!s.ok()) {
return s;
}
int compare_result = rep_->options.comparator->Compare(tmp_slice, target);
if (compare_result < 0) {
if (left == right) {
target_offset = right + 1;
break;
}
left = mid;
} else {
if (left == right) {
target_offset = left;
break;
}
right = mid - 1;
}
}
if (target_offset >= (uint32_t) rep_->num_entries) {
*offset = rep_->index_start_offset;
return Status::OK();
}
char value_offset_chars[Rep::offset_length];
int64_t offset_for_value_offset = rep_->index_start_offset
+ (Rep::GetInternalKeyLength() + Rep::offset_length) * target_offset
+ Rep::GetInternalKeyLength();
Status s = rep_->file->Read(offset_for_value_offset, Rep::offset_length,
&tmp_slice, value_offset_chars);
if (s.ok()) {
*offset = DecodeFixed64(value_offset_chars);
}
return s;
}
Status SimpleTableReader::Get(
const ReadOptions& options, const Slice& k, void* arg,
bool (*saver)(void*, const Slice&, const Slice&, bool),
void (*mark_key_may_exist)(void*)) {
Status s;
SimpleTableIterator* iter = new SimpleTableIterator(this);
for (iter->Seek(k); iter->Valid(); iter->Next()) {
if (!(*saver)(arg, iter->key(), iter->value(), true)) {
break;
}
}
s = iter->status();
delete iter;
return s;
}
bool SimpleTableReader::TEST_KeyInCache(const ReadOptions& options,
const Slice& key) {
return false;
}
uint64_t SimpleTableReader::ApproximateOffsetOf(const Slice& key) {
return 0;
}
SimpleTableIterator::SimpleTableIterator(SimpleTableReader* table) :
table_(table) {
key_str_ = new char[SimpleTableReader::Rep::GetInternalKeyLength()];
value_str_len_ = -1;
SeekToFirst();
}
SimpleTableIterator::~SimpleTableIterator() {
delete[] key_str_;
if (value_str_len_ >= 0) {
delete[] value_str_;
}
}
bool SimpleTableIterator::Valid() const {
return offset_ < table_->rep_->index_start_offset && offset_ >= 0;
}
void SimpleTableIterator::SeekToFirst() {
next_offset_ = 0;
Next();
}
void SimpleTableIterator::SeekToLast() {
assert(false);
}
void SimpleTableIterator::Seek(const Slice& target) {
Status s = table_->GetOffset(target, &next_offset_);
if (!s.ok()) {
status_ = s;
}
Next();
}
void SimpleTableIterator::Next() {
offset_ = next_offset_;
if (offset_ >= table_->rep_->index_start_offset) {
return;
}
Slice result;
int internal_key_size = SimpleTableReader::Rep::GetInternalKeyLength();
Status s = table_->rep_->file->Read(next_offset_, internal_key_size, &result,
key_str_);
next_offset_ += internal_key_size;
key_ = result;
Slice value_size_slice;
s = table_->rep_->file->Read(next_offset_, 4, &value_size_slice, tmp_str_);
next_offset_ += 4;
uint32_t value_size = DecodeFixed32(tmp_str_);
Slice value_slice;
if ((int) value_size > value_str_len_) {
if (value_str_len_ >= 0) {
delete[] value_str_;
}
value_str_ = new char[value_size];
value_str_len_ = value_size;
}
s = table_->rep_->file->Read(next_offset_, value_size, &value_slice,
value_str_);
next_offset_ += value_size;
value_ = value_slice;
}
void SimpleTableIterator::Prev() {
assert(false);
}
Slice SimpleTableIterator::key() const {
Log(table_->rep_->options.info_log, "key!!!!");
return key_;
}
Slice SimpleTableIterator::value() const {
return value_;
}
Status SimpleTableIterator::status() const {
return status_;
}
class SimpleTableBuilder: public TableBuilder {
public:
// Create a builder that will store the contents of the table it is
// building in *file. Does not close the file. It is up to the
// caller to close the file after calling Finish(). The output file
// will be part of level specified by 'level'. A value of -1 means
// that the caller does not know which level the output file will reside.
SimpleTableBuilder(const Options& options, WritableFile* file,
CompressionType compression_type);
// REQUIRES: Either Finish() or Abandon() has been called.
~SimpleTableBuilder();
// Add key,value to the table being constructed.
// REQUIRES: key is after any previously added key according to comparator.
// REQUIRES: Finish(), Abandon() have not been called
void Add(const Slice& key, const Slice& value) override;
// Return non-ok iff some error has been detected.
Status status() const override;
// Finish building the table. Stops using the file passed to the
// constructor after this function returns.
// REQUIRES: Finish(), Abandon() have not been called
Status Finish() override;
// Indicate that the contents of this builder should be abandoned. Stops
// using the file passed to the constructor after this function returns.
// If the caller is not going to call Finish(), it must call Abandon()
// before destroying this builder.
// REQUIRES: Finish(), Abandon() have not been called
void Abandon() override;
// Number of calls to Add() so far.
uint64_t NumEntries() const override;
// Size of the file generated so far. If invoked after a successful
// Finish() call, returns the size of the final generated file.
uint64_t FileSize() const override;
private:
struct Rep;
Rep* rep_;
// No copying allowed
SimpleTableBuilder(const SimpleTableBuilder&) = delete;
void operator=(const SimpleTableBuilder&) = delete;
};
struct SimpleTableBuilder::Rep {
Options options;
WritableFile* file;
uint64_t offset = 0;
Status status;
uint64_t num_entries = 0;
bool closed = false; // Either Finish() or Abandon() has been called.
const static int user_key_size = 16;
const static int offset_length = 8;
const static int key_footer_len = 8;
static int GetInternalKeyLength() {
return user_key_size + key_footer_len;
}
std::string index;
Rep(const Options& opt, WritableFile* f) :
options(opt), file(f) {
}
~Rep() {
}
};
SimpleTableBuilder::SimpleTableBuilder(const Options& options,
WritableFile* file,
CompressionType compression_type) :
rep_(new SimpleTableBuilder::Rep(options, file)) {
}
SimpleTableBuilder::~SimpleTableBuilder() {
delete (rep_);
}
void SimpleTableBuilder::Add(const Slice& key, const Slice& value) {
assert((int ) key.size() == Rep::GetInternalKeyLength());
// Update index
rep_->index.append(key.data(), key.size());
PutFixed64(&(rep_->index), rep_->offset);
// Write key-value pair
rep_->file->Append(key);
rep_->offset += Rep::GetInternalKeyLength();
std::string size;
int value_size = value.size();
PutFixed32(&size, value_size);
Slice sizeSlice(size);
rep_->file->Append(sizeSlice);
rep_->file->Append(value);
rep_->offset += value_size + 4;
rep_->num_entries++;
}
Status SimpleTableBuilder::status() const {
return Status::OK();
}
Status SimpleTableBuilder::Finish() {
Rep* r = rep_;
assert(!r->closed);
r->closed = true;
uint64_t index_offset = rep_->offset;
Slice index_slice(rep_->index);
rep_->file->Append(index_slice);
rep_->offset += index_slice.size();
std::string index_offset_str;
PutFixed64(&index_offset_str, index_offset);
Slice foot_slice(index_offset_str);
rep_->file->Append(foot_slice);
rep_->offset += foot_slice.size();
return Status::OK();
}
void SimpleTableBuilder::Abandon() {
rep_->closed = true;
}
uint64_t SimpleTableBuilder::NumEntries() const {
return rep_->num_entries;
}
uint64_t SimpleTableBuilder::FileSize() const {
return rep_->offset;
}
class SimpleTableFactory: public TableFactory {
public:
~SimpleTableFactory() {
}
SimpleTableFactory() {
}
const char* Name() const override {
return "SimpleTable";
}
Status GetTableReader(const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file,
uint64_t file_size,
unique_ptr<TableReader>* table_reader) const;
TableBuilder* GetTableBuilder(const Options& options, WritableFile* file,
CompressionType compression_type) const;
};
Status SimpleTableFactory::GetTableReader(
const Options& options, const EnvOptions& soptions,
unique_ptr<RandomAccessFile> && file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const {
return SimpleTableReader::Open(options, soptions, std::move(file), file_size,
table_reader);
}
TableBuilder* SimpleTableFactory::GetTableBuilder(
const Options& options, WritableFile* file,
CompressionType compression_type) const {
return new SimpleTableBuilder(options, file, compression_type);
}
class SimpleTableDBTest {
protected:
public:
std::string dbname_;
Env* env_;
DB* db_;
Options last_options_;
SimpleTableDBTest() :
env_(Env::Default()) {
dbname_ = test::TmpDir() + "/simple_table_db_test";
ASSERT_OK(DestroyDB(dbname_, Options()));
db_ = nullptr;
Reopen();
}
~SimpleTableDBTest() {
delete db_;
ASSERT_OK(DestroyDB(dbname_, Options()));
}
// Return the current option configuration.
Options CurrentOptions() {
Options options;
options.table_factory.reset(new SimpleTableFactory());
return options;
}
DBImpl* dbfull() {
return reinterpret_cast<DBImpl*>(db_);
}
void Reopen(Options* options = nullptr) {
ASSERT_OK(TryReopen(options));
}
void Close() {
delete db_;
db_ = nullptr;
}
void DestroyAndReopen(Options* options = nullptr) {
//Destroy using last options
Destroy(&last_options_);
ASSERT_OK(TryReopen(options));
}
void Destroy(Options* options) {
delete db_;
db_ = nullptr;
ASSERT_OK(DestroyDB(dbname_, *options));
}
Status PureReopen(Options* options, DB** db) {
return DB::Open(*options, dbname_, db);
}
Status TryReopen(Options* options = nullptr) {
delete db_;
db_ = nullptr;
Options opts;
if (options != nullptr) {
opts = *options;
} else {
opts = CurrentOptions();
opts.create_if_missing = true;
}
last_options_ = opts;
return DB::Open(opts, dbname_, &db_);
}
Status Put(const Slice& k, const Slice& v) {
return db_->Put(WriteOptions(), k, v);
}
Status Delete(const std::string& k) {
return db_->Delete(WriteOptions(), k);
}
std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) {
ReadOptions options;
options.snapshot = snapshot;
std::string result;
Status s = db_->Get(options, k, &result);
if (s.IsNotFound()) {
result = "NOT_FOUND";
} else if (!s.ok()) {
result = s.ToString();
}
return result;
}
int NumTableFilesAtLevel(int level) {
std::string property;
ASSERT_TRUE(
db_->GetProperty("rocksdb.num-files-at-level" + NumberToString(level),
&property));
return atoi(property.c_str());
}
// Return spread of files per level
std::string FilesPerLevel() {
std::string result;
int last_non_zero_offset = 0;
for (int level = 0; level < db_->NumberLevels(); level++) {
int f = NumTableFilesAtLevel(level);
char buf[100];
snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
result += buf;
if (f > 0) {
last_non_zero_offset = result.size();
}
}
result.resize(last_non_zero_offset);
return result;
}
std::string IterStatus(Iterator* iter) {
std::string result;
if (iter->Valid()) {
result = iter->key().ToString() + "->" + iter->value().ToString();
} else {
result = "(invalid)";
}
return result;
}
};
TEST(SimpleTableDBTest, Empty) {
ASSERT_TRUE(db_ != nullptr);
ASSERT_EQ("NOT_FOUND", Get("0000000000000foo"));
}
TEST(SimpleTableDBTest, ReadWrite) {
ASSERT_OK(Put("0000000000000foo", "v1"));
ASSERT_EQ("v1", Get("0000000000000foo"));
ASSERT_OK(Put("0000000000000bar", "v2"));
ASSERT_OK(Put("0000000000000foo", "v3"));
ASSERT_EQ("v3", Get("0000000000000foo"));
ASSERT_EQ("v2", Get("0000000000000bar"));
}
TEST(SimpleTableDBTest, Flush) {
ASSERT_OK(Put("0000000000000foo", "v1"));
ASSERT_OK(Put("0000000000000bar", "v2"));
ASSERT_OK(Put("0000000000000foo", "v3"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v3", Get("0000000000000foo"));
ASSERT_EQ("v2", Get("0000000000000bar"));
}
TEST(SimpleTableDBTest, Flush2) {
ASSERT_OK(Put("0000000000000bar", "b"));
ASSERT_OK(Put("0000000000000foo", "v1"));
dbfull()->TEST_FlushMemTable();
ASSERT_OK(Put("0000000000000foo", "v2"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v2", Get("0000000000000foo"));
ASSERT_OK(Put("0000000000000eee", "v3"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v3", Get("0000000000000eee"));
ASSERT_OK(Delete("0000000000000bar"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("NOT_FOUND", Get("0000000000000bar"));
ASSERT_OK(Put("0000000000000eee", "v5"));
dbfull()->TEST_FlushMemTable();
ASSERT_EQ("v5", Get("0000000000000eee"));
}
static std::string Key(int i) {
char buf[100];
snprintf(buf, sizeof(buf), "key_______%06d", i);
return std::string(buf);
}
static std::string RandomString(Random* rnd, int len) {
std::string r;
test::RandomString(rnd, len, &r);
return r;
}
TEST(SimpleTableDBTest, CompactionTrigger) {
Options options = CurrentOptions();
options.write_buffer_size = 100 << 10; //100KB
options.num_levels = 3;
options.max_mem_compaction_level = 0;
options.level0_file_num_compaction_trigger = 3;
Reopen(&options);
Random rnd(301);
for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
num++) {
std::vector<std::string> values;
// Write 120KB (12 values, each 10K)
for (int i = 0; i < 12; i++) {
values.push_back(RandomString(&rnd, 10000));
ASSERT_OK(Put(Key(i), values[i]));
}
dbfull()->TEST_WaitForFlushMemTable();
ASSERT_EQ(NumTableFilesAtLevel(0), num + 1);
}
//generate one more file in level-0, and should trigger level-0 compaction
std::vector<std::string> values;
for (int i = 0; i < 12; i++) {
values.push_back(RandomString(&rnd, 10000));
ASSERT_OK(Put(Key(i), values[i]));
}
dbfull()->TEST_WaitForCompact();
ASSERT_EQ(NumTableFilesAtLevel(0), 0);
ASSERT_EQ(NumTableFilesAtLevel(1), 1);
}
} // namespace rocksdb
int main(int argc, char** argv) {
return rocksdb::test::RunAllTests();
}

View File

@@ -12,15 +12,15 @@
#include "db/filename.h"
#include "rocksdb/statistics.h"
#include "table/table.h"
#include "rocksdb/table.h"
#include "util/coding.h"
#include "util/stop_watch.h"
namespace rocksdb {
static void DeleteEntry(const Slice& key, void* value) {
Table* table = reinterpret_cast<Table*>(value);
delete table;
TableReader* table_reader = reinterpret_cast<TableReader*>(value);
delete table_reader;
}
static void UnrefEntry(void* arg1, void* arg2) {
@@ -63,7 +63,7 @@ Status TableCache::FindTable(const EnvOptions& toptions,
}
std::string fname = TableFileName(dbname_, file_number);
unique_ptr<RandomAccessFile> file;
unique_ptr<Table> table;
unique_ptr<TableReader> table_reader;
s = env_->NewRandomAccessFile(fname, &file, toptions);
RecordTick(options_->statistics, NO_FILE_OPENS);
if (s.ok()) {
@@ -71,17 +71,19 @@ Status TableCache::FindTable(const EnvOptions& toptions,
file->Hint(RandomAccessFile::RANDOM);
}
StopWatch sw(env_, options_->statistics, TABLE_OPEN_IO_MICROS);
s = Table::Open(*options_, toptions, std::move(file), file_size, &table);
s = options_->table_factory->GetTableReader(*options_, toptions,
std::move(file), file_size,
&table_reader);
}
if (!s.ok()) {
assert(table == nullptr);
assert(table_reader == nullptr);
RecordTick(options_->statistics, NO_FILE_ERRORS);
// We do not cache error results so that if the error is transient,
// or somebody repairs the file, we recover automatically.
} else {
assert(file.get() == nullptr);
*handle = cache_->Insert(key, table.release(), 1, &DeleteEntry);
*handle = cache_->Insert(key, table_reader.release(), 1, &DeleteEntry);
}
}
return s;
@@ -91,10 +93,10 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
const EnvOptions& toptions,
uint64_t file_number,
uint64_t file_size,
Table** tableptr,
TableReader** table_reader_ptr,
bool for_compaction) {
if (tableptr != nullptr) {
*tableptr = nullptr;
if (table_reader_ptr != nullptr) {
*table_reader_ptr = nullptr;
}
Cache::Handle* handle = nullptr;
@@ -104,16 +106,16 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
return NewErrorIterator(s);
}
Table* table =
reinterpret_cast<Table*>(cache_->Value(handle));
Iterator* result = table->NewIterator(options);
TableReader* table_reader =
reinterpret_cast<TableReader*>(cache_->Value(handle));
Iterator* result = table_reader->NewIterator(options);
result->RegisterCleanup(&UnrefEntry, cache_.get(), handle);
if (tableptr != nullptr) {
*tableptr = table;
if (table_reader_ptr != nullptr) {
*table_reader_ptr = table_reader;
}
if (for_compaction) {
table->SetupForCompaction();
table_reader->SetupForCompaction();
}
return result;
@@ -132,9 +134,9 @@ Status TableCache::Get(const ReadOptions& options,
&handle, table_io,
options.read_tier == kBlockCacheTier);
if (s.ok()) {
Table* t =
reinterpret_cast<Table*>(cache_->Value(handle));
s = t->InternalGet(options, k, arg, saver, mark_key_may_exist);
TableReader* t =
reinterpret_cast<TableReader*>(cache_->Value(handle));
s = t->Get(options, k, arg, saver, mark_key_may_exist);
cache_->Release(handle);
} else if (options.read_tier && s.IsIncomplete()) {
// Couldnt find Table in cache but treat as kFound if no_io set
@@ -154,8 +156,8 @@ bool TableCache::PrefixMayMatch(const ReadOptions& options,
file_size, &handle, table_io);
bool may_match = true;
if (s.ok()) {
Table* t =
reinterpret_cast<Table*>(cache_->Value(handle));
TableReader* t =
reinterpret_cast<TableReader*>(cache_->Value(handle));
may_match = t->PrefixMayMatch(internal_prefix);
cache_->Release(handle);
}

View File

@@ -16,7 +16,7 @@
#include "rocksdb/env.h"
#include "rocksdb/cache.h"
#include "port/port.h"
#include "table/table.h"
#include "rocksdb/table.h"
namespace rocksdb {
@@ -39,7 +39,7 @@ class TableCache {
const EnvOptions& toptions,
uint64_t file_number,
uint64_t file_size,
Table** tableptr = nullptr,
TableReader** table_reader_ptr = nullptr,
bool for_compaction = false);
// If a seek to internal key "k" in specified file finds an entry,

View File

@@ -10,9 +10,8 @@
#include "db/dbformat.h"
#include "db/db_impl.h"
#include "db/table_stats_collector.h"
#include "rocksdb/table_builder.h"
#include "rocksdb/table_stats.h"
#include "table/table.h"
#include "rocksdb/table.h"
#include "util/coding.h"
#include "util/testharness.h"
#include "util/testutil.h"
@@ -21,7 +20,7 @@ namespace rocksdb {
class TableStatsTest {
private:
unique_ptr<Table> table_;
unique_ptr<TableReader> table_reader_;
};
// TODO(kailiu) the following classes should be moved to some more general
@@ -89,21 +88,21 @@ void MakeBuilder(
std::unique_ptr<TableBuilder>* builder) {
writable->reset(new FakeWritableFile);
builder->reset(
new TableBuilder(options, writable->get())
);
options.table_factory->GetTableBuilder(options, writable->get(),
options.compression));
}
void OpenTable(
const Options& options,
const std::string& contents,
std::unique_ptr<Table>* table) {
std::unique_ptr<TableReader>* table_reader) {
std::unique_ptr<RandomAccessFile> file(new FakeRandomeAccessFile(contents));
auto s = Table::Open(
auto s = options.table_factory->GetTableReader(
options,
EnvOptions(),
std::move(file),
contents.size(),
table
table_reader
);
ASSERT_OK(s);
}
@@ -176,9 +175,9 @@ TEST(TableStatsTest, CustomizedTableStatsCollector) {
ASSERT_OK(builder->Finish());
// -- Step 2: Open table
std::unique_ptr<Table> table;
OpenTable(options, writable->contents(), &table);
const auto& stats = table->GetTableStats().user_collected_stats;
std::unique_ptr<TableReader> table_reader;
OpenTable(options, writable->contents(), &table_reader);
const auto& stats = table_reader->GetTableStats().user_collected_stats;
ASSERT_EQ("Rocksdb", stats.at("TableStatsTest"));
@@ -234,9 +233,9 @@ TEST(TableStatsTest, InternalKeyStatsCollector) {
ASSERT_OK(builder->Finish());
std::unique_ptr<Table> table;
OpenTable(options, writable->contents(), &table);
const auto& stats = table->GetTableStats().user_collected_stats;
std::unique_ptr<TableReader> table_reader;
OpenTable(options, writable->contents(), &table_reader);
const auto& stats = table_reader->GetTableStats().user_collected_stats;
uint64_t deleted = 0;
Slice key(stats.at(InternalKeyTableStatsNames::kDeletedKeys));

View File

@@ -19,7 +19,7 @@
#include "db/table_cache.h"
#include "rocksdb/env.h"
#include "rocksdb/merge_operator.h"
#include "rocksdb/table_builder.h"
#include "rocksdb/table.h"
#include "table/merger.h"
#include "table/two_level_iterator.h"
#include "util/coding.h"
@@ -294,11 +294,11 @@ struct Saver {
};
}
// Called from TableCache::Get and InternalGet when file/block in which key may
// exist are not there in TableCache/BlockCache respectively. In this case we
// can't guarantee that key does not exist and are not permitted to do IO to be
// certain.Set the status=kFound and value_found=false to let the caller know
// that key may exist but is not there in memory
// Called from TableCache::Get and Table::Get when file/block in which
// key may exist are not there in TableCache/BlockCache respectively. In this
// case we can't guarantee that key does not exist and are not permitted to do
// IO to be certain.Set the status=kFound and value_found=false to let the
// caller know that key may exist but is not there in memory
static void MarkKeyMayExist(void* arg) {
Saver* s = reinterpret_cast<Saver*>(arg);
s->state = kFound;
@@ -1920,12 +1920,12 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
} else {
// "ikey" falls in the range for this table. Add the
// approximate offset of "ikey" within the table.
Table* tableptr;
TableReader* table_reader_ptr;
Iterator* iter = table_cache_->NewIterator(
ReadOptions(), storage_options_, files[i]->number,
files[i]->file_size, &tableptr);
if (tableptr != nullptr) {
result += tableptr->ApproximateOffsetOf(ikey.Encode());
files[i]->file_size, &table_reader_ptr);
if (table_reader_ptr != nullptr) {
result += table_reader_ptr->ApproximateOffsetOf(ikey.Encode());
}
delete iter;
}