Merge branch 'master' into columnfamilies

Conflicts:
	db/db_impl.cc
	db/db_impl.h
	db/memtable_list.cc
	db/memtable_list.h
	db/version_set.cc
	db/version_set.h
This commit is contained in:
Igor Canadi
2014-02-12 14:01:30 -08:00
53 changed files with 964 additions and 325 deletions

View File

@@ -233,6 +233,30 @@ void BlockBasedTableBuilder::WriteBlock(BlockBuilder* block,
type = kNoCompression;
}
break;
case kLZ4Compression:
if (port::LZ4_Compress(r->options.compression_opts, raw.data(),
raw.size(), compressed) &&
GoodCompressionRatio(compressed->size(), raw.size())) {
block_contents = *compressed;
} else {
// LZ4 not supported, or not good compression ratio, so just
// store uncompressed form
block_contents = raw;
type = kNoCompression;
}
break;
case kLZ4HCCompression:
if (port::LZ4HC_Compress(r->options.compression_opts, raw.data(),
raw.size(), compressed) &&
GoodCompressionRatio(compressed->size(), raw.size())) {
block_contents = *compressed;
} else {
// LZ4 not supported, or not good compression ratio, so just
// store uncompressed form
block_contents = raw;
type = kNoCompression;
}
break;
}
WriteRawBlock(block_contents, type, handle);
r->compressed_output.clear();

View File

@@ -62,7 +62,7 @@ struct BlockBasedTable::Rep {
unique_ptr<Block> index_block;
unique_ptr<FilterBlockReader> filter;
TableProperties table_properties;
std::shared_ptr<const TableProperties> table_properties;
};
BlockBasedTable::~BlockBasedTable() {
@@ -255,9 +255,10 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions,
meta_iter->Seek(kPropertiesBlock);
if (meta_iter->Valid() && meta_iter->key() == kPropertiesBlock) {
s = meta_iter->status();
TableProperties* table_properties = nullptr;
if (s.ok()) {
s = ReadProperties(meta_iter->value(), rep->file.get(), rep->options.env,
rep->options.info_log.get(), &rep->table_properties);
rep->options.info_log.get(), &table_properties);
}
if (!s.ok()) {
@@ -265,6 +266,8 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions,
"[Warning] Encountered error while reading data from properties "
"block " + s.ToString();
Log(rep->options.info_log, "%s", err_msg.c_str());
} else {
rep->table_properties.reset(table_properties);
}
}
@@ -339,7 +342,8 @@ void BlockBasedTable::SetupForCompaction() {
compaction_optimized_ = true;
}
const TableProperties& BlockBasedTable::GetTableProperties() {
std::shared_ptr<const TableProperties> BlockBasedTable::GetTableProperties()
const {
return rep_->table_properties;
}

View File

@@ -86,7 +86,7 @@ class BlockBasedTable : public TableReader {
// posix_fadvise
void SetupForCompaction() override;
const TableProperties& GetTableProperties() override;
std::shared_ptr<const TableProperties> GetTableProperties() const override;
~BlockBasedTable();

View File

@@ -10,6 +10,7 @@
#include "table/format.h"
#include <string>
#include <inttypes.h>
#include "port/port.h"
#include "rocksdb/env.h"
@@ -64,7 +65,8 @@ Status Footer::DecodeFrom(Slice* input) {
if (magic != table_magic_number()) {
char buffer[80];
snprintf(buffer, sizeof(buffer) - 1,
"not an sstable (bad magic number --- %lx)", magic);
"not an sstable (bad magic number --- %lx)",
(long)magic);
return Status::InvalidArgument(buffer);
}
} else {
@@ -228,6 +230,28 @@ Status UncompressBlockContents(const char* data, size_t n,
result->heap_allocated = true;
result->cachable = true;
break;
case kLZ4Compression:
ubuf = port::LZ4_Uncompress(data, n, &decompress_size);
static char lz4_corrupt_msg[] =
"LZ4 not supported or corrupted LZ4 compressed block contents";
if (!ubuf) {
return Status::Corruption(lz4_corrupt_msg);
}
result->data = Slice(ubuf, decompress_size);
result->heap_allocated = true;
result->cachable = true;
break;
case kLZ4HCCompression:
ubuf = port::LZ4_Uncompress(data, n, &decompress_size);
static char lz4hc_corrupt_msg[] =
"LZ4HC not supported or corrupted LZ4HC compressed block contents";
if (!ubuf) {
return Status::Corruption(lz4hc_corrupt_msg);
}
result->data = Slice(ubuf, decompress_size);
result->heap_allocated = true;
result->cachable = true;
break;
default:
return Status::Corruption("bad block type");
}

View File

@@ -109,7 +109,7 @@ class Footer {
kEncodedLength = 2 * BlockHandle::kMaxEncodedLength + 8
};
const uint64_t kInvalidTableMagicNumber = 0;
static const uint64_t kInvalidTableMagicNumber = 0;
private:
// Set the table_magic_number only when it was not previously

View File

@@ -133,12 +133,9 @@ bool NotifyCollectTableCollectorsOnFinish(
return all_succeeded;
}
Status ReadProperties(
const Slice& handle_value,
RandomAccessFile* file,
Env* env,
Logger* logger,
TableProperties* table_properties) {
Status ReadProperties(const Slice& handle_value, RandomAccessFile* file,
Env* env, Logger* logger,
TableProperties** table_properties) {
assert(table_properties);
Slice v = handle_value;
@@ -161,18 +158,22 @@ Status ReadProperties(
std::unique_ptr<Iterator> iter(
properties_block.NewIterator(BytewiseComparator()));
auto new_table_properties = new TableProperties();
// All pre-defined properties of type uint64_t
std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
{TablePropertiesNames::kDataSize, &table_properties->data_size},
{TablePropertiesNames::kIndexSize, &table_properties->index_size},
{TablePropertiesNames::kFilterSize, &table_properties->filter_size},
{TablePropertiesNames::kRawKeySize, &table_properties->raw_key_size},
{TablePropertiesNames::kRawValueSize, &table_properties->raw_value_size},
{TablePropertiesNames::kDataSize, &new_table_properties->data_size},
{TablePropertiesNames::kIndexSize, &new_table_properties->index_size},
{TablePropertiesNames::kFilterSize, &new_table_properties->filter_size},
{TablePropertiesNames::kRawKeySize, &new_table_properties->raw_key_size},
{TablePropertiesNames::kRawValueSize,
&new_table_properties->raw_value_size},
{TablePropertiesNames::kNumDataBlocks,
&table_properties->num_data_blocks},
{TablePropertiesNames::kNumEntries, &table_properties->num_entries},
{TablePropertiesNames::kFormatVersion, &table_properties->format_version},
{TablePropertiesNames::kFixedKeyLen, &table_properties->fixed_key_len}};
&new_table_properties->num_data_blocks},
{TablePropertiesNames::kNumEntries, &new_table_properties->num_entries},
{TablePropertiesNames::kFormatVersion,
&new_table_properties->format_version},
{TablePropertiesNames::kFixedKeyLen,
&new_table_properties->fixed_key_len}, };
std::string last_key;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
@@ -203,24 +204,25 @@ Status ReadProperties(
}
*(pos->second) = val;
} else if (key == TablePropertiesNames::kFilterPolicy) {
table_properties->filter_policy_name = raw_val.ToString();
new_table_properties->filter_policy_name = raw_val.ToString();
} else {
// handle user-collected properties
table_properties->user_collected_properties.insert(
new_table_properties->user_collected_properties.insert(
{key, raw_val.ToString()});
}
}
if (s.ok()) {
*table_properties = new_table_properties;
} else {
delete new_table_properties;
}
return s;
}
Status ReadTableProperties(
RandomAccessFile* file,
uint64_t file_size,
uint64_t table_magic_number,
Env* env,
Logger* info_log,
TableProperties* properties) {
Status ReadTableProperties(RandomAccessFile* file, uint64_t file_size,
uint64_t table_magic_number, Env* env,
Logger* info_log, TableProperties** properties) {
// -- Read metaindex block
Footer footer(table_magic_number);
auto s = ReadFooterFromFile(file, file_size, &footer);

View File

@@ -103,21 +103,20 @@ bool NotifyCollectTableCollectorsOnFinish(
PropertyBlockBuilder* builder);
// Read the properties from the table.
Status ReadProperties(
const Slice& handle_value,
RandomAccessFile* file,
Env* env,
Logger* logger,
TableProperties* table_properties);
// @returns a status to indicate if the operation succeeded. On success,
// *table_properties will point to a heap-allocated TableProperties
// object, otherwise value of `table_properties` will not be modified.
Status ReadProperties(const Slice& handle_value, RandomAccessFile* file,
Env* env, Logger* logger,
TableProperties** table_properties);
// Directly read the properties from the properties block of a plain table.
Status ReadTableProperties(
RandomAccessFile* file,
uint64_t file_size,
uint64_t table_magic_number,
Env* env,
Logger* info_log,
TableProperties* properties);
// @returns a status to indicate if the operation succeeded. On success,
// *table_properties will point to a heap-allocated TableProperties
// object, otherwise value of `table_properties` will not be modified.
Status ReadTableProperties(RandomAccessFile* file, uint64_t file_size,
uint64_t table_magic_number, Env* env,
Logger* info_log, TableProperties** properties);
// Read the magic number of the specified file directly. The magic number
// of a valid sst table the last 8-byte of the file.

View File

@@ -87,15 +87,15 @@ PlainTableReader::PlainTableReader(const EnvOptions& storage_options,
const InternalKeyComparator& icomparator,
uint64_t file_size, int bloom_bits_per_key,
double hash_table_ratio,
const TableProperties& table_properties)
const TableProperties* table_properties)
: soptions_(storage_options),
internal_comparator_(icomparator),
file_size_(file_size),
kHashTableRatio(hash_table_ratio),
kBloomBitsPerKey(bloom_bits_per_key),
table_properties_(table_properties),
data_end_offset_(table_properties_.data_size),
user_key_len_(table_properties.fixed_key_len) {}
data_end_offset_(table_properties_->data_size),
user_key_len_(table_properties->fixed_key_len) {}
PlainTableReader::~PlainTableReader() {
delete[] hash_table_;
@@ -117,17 +117,16 @@ Status PlainTableReader::Open(const Options& options,
return Status::NotSupported("File is too large for PlainTableReader!");
}
TableProperties table_properties;
TableProperties* props = nullptr;
auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber,
options.env, options.info_log.get(),
&table_properties);
options.env, options.info_log.get(), &props);
if (!s.ok()) {
return s;
}
std::unique_ptr<PlainTableReader> new_reader(new PlainTableReader(
soptions, internal_comparator, file_size, bloom_bits_per_key,
hash_table_ratio, table_properties));
std::unique_ptr<PlainTableReader> new_reader(
new PlainTableReader(soptions, internal_comparator, file_size,
bloom_bits_per_key, hash_table_ratio, props));
new_reader->file_ = std::move(file);
new_reader->options_ = options;

View File

@@ -64,13 +64,15 @@ class PlainTableReader: public TableReader {
void SetupForCompaction();
const TableProperties& GetTableProperties() { return table_properties_; }
std::shared_ptr<const TableProperties> GetTableProperties() const {
return table_properties_;
}
PlainTableReader(const EnvOptions& storage_options,
const InternalKeyComparator& internal_comparator,
uint64_t file_size, int bloom_num_bits,
double hash_table_ratio,
const TableProperties& table_properties);
const TableProperties* table_properties);
~PlainTableReader();
private:
@@ -95,7 +97,7 @@ class PlainTableReader: public TableReader {
const int kBloomBitsPerKey;
DynamicBloom* bloom_ = nullptr;
TableProperties table_properties_;
std::shared_ptr<const TableProperties> table_properties_;
const uint32_t data_start_offset_ = 0;
const uint32_t data_end_offset_;
const size_t user_key_len_;

View File

@@ -8,6 +8,7 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <memory>
namespace rocksdb {
@@ -47,7 +48,7 @@ class TableReader {
// posix_fadvise
virtual void SetupForCompaction() = 0;
virtual const TableProperties& GetTableProperties() = 0;
virtual std::shared_ptr<const TableProperties> GetTableProperties() const = 0;
// Calls (*result_handler)(handle_context, ...) repeatedly, starting with
// the entry found after a call to Seek(key), until result_handler returns

View File

@@ -488,30 +488,62 @@ class DBConstructor: public Constructor {
};
static bool SnappyCompressionSupported() {
#ifdef SNAPPY
std::string out;
Slice in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
return port::Snappy_Compress(Options().compression_opts,
in.data(), in.size(),
&out);
#else
return false;
#endif
}
static bool ZlibCompressionSupported() {
#ifdef ZLIB
std::string out;
Slice in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
return port::Zlib_Compress(Options().compression_opts,
in.data(), in.size(),
&out);
#else
return false;
#endif
}
#ifdef BZIP2
static bool BZip2CompressionSupported() {
#ifdef BZIP2
std::string out;
Slice in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
return port::BZip2_Compress(Options().compression_opts,
in.data(), in.size(),
&out);
}
#else
return false;
#endif
}
static bool LZ4CompressionSupported() {
#ifdef LZ4
std::string out;
Slice in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
return port::LZ4_Compress(Options().compression_opts, in.data(), in.size(),
&out);
#else
return false;
#endif
}
static bool LZ4HCCompressionSupported() {
#ifdef LZ4
std::string out;
Slice in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
return port::LZ4HC_Compress(Options().compression_opts, in.data(), in.size(),
&out);
#else
return false;
#endif
}
enum TestType {
BLOCK_BASED_TABLE_TEST,
@@ -539,24 +571,23 @@ static std::vector<TestArgs> GenerateArgList() {
std::vector<int> restart_intervals = {16, 1, 1024};
// Only add compression if it is supported
std::vector<CompressionType> compression_types = {kNoCompression};
#ifdef SNAPPY
std::vector<CompressionType> compression_types;
compression_types.push_back(kNoCompression);
if (SnappyCompressionSupported()) {
compression_types.push_back(kSnappyCompression);
}
#endif
#ifdef ZLIB
if (ZlibCompressionSupported()) {
compression_types.push_back(kZlibCompression);
}
#endif
#ifdef BZIP2
if (BZip2CompressionSupported()) {
compression_types.push_back(kBZip2Compression);
}
#endif
if (LZ4CompressionSupported()) {
compression_types.push_back(kLZ4Compression);
}
if (LZ4HCCompressionSupported()) {
compression_types.push_back(kLZ4HCCompression);
}
for (auto test_type : test_types) {
for (auto reverse_compare : reverse_compare_types) {
@@ -908,6 +939,44 @@ class TableTest {
class GeneralTableTest : public TableTest {};
class BlockBasedTableTest : public TableTest {};
class PlainTableTest : public TableTest {};
class TablePropertyTest {};
// This test serves as the living tutorial for the prefix scan of user collected
// properties.
TEST(TablePropertyTest, PrefixScanTest) {
UserCollectedProperties props{{"num.111.1", "1"},
{"num.111.2", "2"},
{"num.111.3", "3"},
{"num.333.1", "1"},
{"num.333.2", "2"},
{"num.333.3", "3"},
{"num.555.1", "1"},
{"num.555.2", "2"},
{"num.555.3", "3"}, };
// prefixes that exist
for (const std::string& prefix : {"num.111", "num.333", "num.555"}) {
int num = 0;
for (auto pos = props.lower_bound(prefix);
pos != props.end() &&
pos->first.compare(0, prefix.size(), prefix) == 0;
++pos) {
++num;
auto key = prefix + "." + std::to_string(num);
ASSERT_EQ(key, pos->first);
ASSERT_EQ(std::to_string(num), pos->second);
}
ASSERT_EQ(3, num);
}
// prefixes that don't exist
for (const std::string& prefix :
{"num.000", "num.222", "num.444", "num.666"}) {
auto pos = props.lower_bound(prefix);
ASSERT_TRUE(pos == props.end() ||
pos->first.compare(0, prefix.size(), prefix) != 0);
}
}
// This test include all the basic checks except those for index size and block
// size, which will be conducted in separated unit tests.
@@ -933,7 +1002,7 @@ TEST(BlockBasedTableTest, BasicBlockBasedTableProperties) {
c.Finish(options, GetPlainInternalComparator(options.comparator), &keys,
&kvmap);
auto& props = c.table_reader()->GetTableProperties();
auto& props = *c.table_reader()->GetTableProperties();
ASSERT_EQ(kvmap.size(), props.num_entries);
auto raw_key_size = kvmap.size() * 2ul;
@@ -964,7 +1033,7 @@ TEST(BlockBasedTableTest, FilterPolicyNameProperties) {
c.Finish(options, GetPlainInternalComparator(options.comparator), &keys,
&kvmap);
auto& props = c.table_reader()->GetTableProperties();
auto& props = *c.table_reader()->GetTableProperties();
ASSERT_EQ("rocksdb.BuiltinBloomFilter", props.filter_policy_name);
}
@@ -1006,8 +1075,7 @@ TEST(BlockBasedTableTest, IndexSizeStat) {
c.Finish(options, GetPlainInternalComparator(options.comparator), &ks,
&kvmap);
auto index_size =
c.table_reader()->GetTableProperties().index_size;
auto index_size = c.table_reader()->GetTableProperties()->index_size;
ASSERT_GT(index_size, last_index_size);
last_index_size = index_size;
}
@@ -1032,7 +1100,7 @@ TEST(BlockBasedTableTest, NumBlockStat) {
c.Finish(options, GetPlainInternalComparator(options.comparator), &ks,
&kvmap);
ASSERT_EQ(kvmap.size(),
c.table_reader()->GetTableProperties().num_data_blocks);
c.table_reader()->GetTableProperties()->num_data_blocks);
}
class BlockCacheProperties {
@@ -1238,18 +1306,19 @@ TEST(PlainTableTest, BasicPlainTableProperties) {
StringSource source(sink.contents(), 72242, true);
TableProperties props;
TableProperties* props = nullptr;
auto s = ReadTableProperties(&source, sink.contents().size(),
kPlainTableMagicNumber, Env::Default(), nullptr,
&props);
std::unique_ptr<TableProperties> props_guard(props);
ASSERT_OK(s);
ASSERT_EQ(0ul, props.index_size);
ASSERT_EQ(0ul, props.filter_size);
ASSERT_EQ(16ul * 26, props.raw_key_size);
ASSERT_EQ(28ul * 26, props.raw_value_size);
ASSERT_EQ(26ul, props.num_entries);
ASSERT_EQ(1ul, props.num_data_blocks);
ASSERT_EQ(0ul, props->index_size);
ASSERT_EQ(0ul, props->filter_size);
ASSERT_EQ(16ul * 26, props->raw_key_size);
ASSERT_EQ(28ul * 26, props->raw_value_size);
ASSERT_EQ(26ul, props->num_entries);
ASSERT_EQ(1ul, props->num_data_blocks);
}
TEST(GeneralTableTest, ApproximateOffsetOfPlain) {
@@ -1307,24 +1376,42 @@ static void DoCompressionTest(CompressionType comp) {
}
TEST(GeneralTableTest, ApproximateOffsetOfCompressed) {
CompressionType compression_state[2];
int valid = 0;
std::vector<CompressionType> compression_state;
if (!SnappyCompressionSupported()) {
fprintf(stderr, "skipping snappy compression tests\n");
} else {
compression_state[valid] = kSnappyCompression;
valid++;
compression_state.push_back(kSnappyCompression);
}
if (!ZlibCompressionSupported()) {
fprintf(stderr, "skipping zlib compression tests\n");
} else {
compression_state[valid] = kZlibCompression;
valid++;
compression_state.push_back(kZlibCompression);
}
for (int i = 0; i < valid; i++) {
DoCompressionTest(compression_state[i]);
// TODO(kailiu) DoCompressionTest() doesn't work with BZip2.
/*
if (!BZip2CompressionSupported()) {
fprintf(stderr, "skipping bzip2 compression tests\n");
} else {
compression_state.push_back(kBZip2Compression);
}
*/
if (!LZ4CompressionSupported()) {
fprintf(stderr, "skipping lz4 compression tests\n");
} else {
compression_state.push_back(kLZ4Compression);
}
if (!LZ4HCCompressionSupported()) {
fprintf(stderr, "skipping lz4hc compression tests\n");
} else {
compression_state.push_back(kLZ4HCCompression);
}
for (auto state : compression_state) {
DoCompressionTest(state);
}
}