Update src/rocksdb2 to rocksdb-3.5.1:

Merge commit 'c168d54495d7d7b84639514f6443ad99b89ce996' into develop
This commit is contained in:
Vinnie Falco
2014-10-27 11:36:32 -07:00
195 changed files with 4433 additions and 9442 deletions

View File

@@ -537,6 +537,8 @@ extern void rocksdb_options_set_min_partial_merge_operands(
rocksdb_options_t*, uint32_t);
extern void rocksdb_options_set_bloom_locality(
rocksdb_options_t*, uint32_t);
extern void rocksdb_options_set_allow_thread_local(
rocksdb_options_t*, unsigned char);
extern void rocksdb_options_set_inplace_update_support(
rocksdb_options_t*, unsigned char);
extern void rocksdb_options_set_inplace_update_num_locks(
@@ -696,10 +698,6 @@ extern void rocksdb_readoptions_set_fill_cache(
extern void rocksdb_readoptions_set_snapshot(
rocksdb_readoptions_t*,
const rocksdb_snapshot_t*);
extern void rocksdb_readoptions_set_iterate_upper_bound(
rocksdb_readoptions_t*,
const char* key,
size_t keylen);
extern void rocksdb_readoptions_set_read_tier(
rocksdb_readoptions_t*, int);
extern void rocksdb_readoptions_set_tailing(

View File

@@ -127,6 +127,9 @@ class Cache {
void LRU_Append(Handle* e);
void Unref(Handle* e);
struct Rep;
Rep* rep_;
// No copying allowed
Cache(const Cache&);
void operator=(const Cache&);

View File

@@ -62,10 +62,6 @@ class Comparator {
// must not be deleted.
extern const Comparator* BytewiseComparator();
// Return a builtin comparator that uses reverse lexicographic byte-wise
// ordering.
extern const Comparator* ReverseBytewiseComparator();
} // namespace rocksdb
#endif // STORAGE_ROCKSDB_INCLUDE_COMPARATOR_H_

View File

@@ -123,7 +123,7 @@ class DB {
// Open DB with column families.
// db_options specify database specific options
// column_families is the vector of all column families in the database,
// column_families is the vector of all column families in the databse,
// containing column family name and options. You need to open ALL column
// families in the database. To get the list of column families, you can use
// ListColumnFamilies(). Also, you can open only a subset of column families
@@ -359,14 +359,6 @@ class DB {
return CompactRange(DefaultColumnFamily(), begin, end, reduce_level,
target_level, target_path_id);
}
virtual bool SetOptions(ColumnFamilyHandle* column_family,
const std::unordered_map<std::string, std::string>& new_options) {
return true;
}
virtual bool SetOptions(
const std::unordered_map<std::string, std::string>& new_options) {
return SetOptions(DefaultColumnFamily(), new_options);
}
// Number of levels used for this DB.
virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0;

View File

@@ -21,52 +21,11 @@
#define STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_
#include <string>
#include <memory>
namespace rocksdb {
class Slice;
// A class that takes a bunch of keys, then generates filter
class FilterBitsBuilder {
public:
virtual ~FilterBitsBuilder() {}
// Add Key to filter, you could use any way to store the key.
// Such as: storing hashes or original keys
// Keys are in sorted order and duplicated keys are possible.
virtual void AddKey(const Slice& key) = 0;
// Generate the filter using the keys that are added
// The return value of this function would be the filter bits,
// The ownership of actual data is set to buf
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
};
// A class that checks if a key can be in filter
// It should be initialized by Slice generated by BitsBuilder
class FilterBitsReader {
public:
virtual ~FilterBitsReader() {}
// Check if the entry match the bits in filter
virtual bool MayMatch(const Slice& entry) = 0;
};
// We add a new format of filter block called full filter block
// This new interface gives you more space of customization
//
// For the full filter block, you can plug in your version by implement
// the FilterBitsBuilder and FilterBitsReader
//
// There are two sets of interface in FilterPolicy
// Set 1: CreateFilter, KeyMayMatch: used for blockbased filter
// Set 2: GetFilterBitsBuilder, GetFilterBitsReader, they are used for
// full filter.
// Set 1 MUST be implemented correctly, Set 2 is optional
// RocksDB would first try using functions in Set 2. if they return nullptr,
// it would use Set 1 instead.
// You can choose filter type in NewBloomFilterPolicy
class FilterPolicy {
public:
virtual ~FilterPolicy();
@@ -92,28 +51,11 @@ class FilterPolicy {
// This method may return true or false if the key was not on the
// list, but it should aim to return false with a high probability.
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
// Get the FilterBitsBuilder, which is ONLY used for full filter block
// It contains interface to take individual key, then generate filter
virtual FilterBitsBuilder* GetFilterBitsBuilder() const {
return nullptr;
}
// Get the FilterBitsReader, which is ONLY used for full filter block
// It contains interface to tell if key can be in filter
// The input slice should NOT be deleted by FilterPolicy
virtual FilterBitsReader* GetFilterBitsReader(const Slice& contents) const {
return nullptr;
}
};
// Return a new filter policy that uses a bloom filter with approximately
// the specified number of bits per key.
//
// bits_per_key: bits per key in bloom filter. A good value for bits_per_key
// the specified number of bits per key. A good value for bits_per_key
// is 10, which yields a filter with ~ 1% false positive rate.
// use_block_based_builder: use block based filter rather than full fiter.
// If you want to builder full filter, it needs to be set to false.
//
// Callers must delete the result after any database that is using the
// result has been closed.
@@ -125,8 +67,8 @@ class FilterPolicy {
// ignores trailing spaces, it would be incorrect to use a
// FilterPolicy (like NewBloomFilterPolicy) that does not ignore
// trailing spaces in keys.
extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key,
bool use_block_based_builder = true);
extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key);
}
#endif // STORAGE_ROCKSDB_INCLUDE_FILTER_POLICY_H_

View File

@@ -1,87 +0,0 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#pragma once
#include <vector>
#include "rocksdb/options.h"
namespace rocksdb {
// ImmutableCFOptions is a data struct used by RocksDB internal. It contains a
// subset of Options that should not be changed during the entire lifetime
// of DB. You shouldn't need to access this data structure unless you are
// implementing a new TableFactory. Raw pointers defined in this struct do
// not have ownership to the data they point to. Options contains shared_ptr
// to these data.
struct ImmutableCFOptions {
explicit ImmutableCFOptions(const Options& options);
CompactionStyle compaction_style;
CompactionOptionsUniversal compaction_options_universal;
CompactionOptionsFIFO compaction_options_fifo;
const SliceTransform* prefix_extractor;
const Comparator* comparator;
MergeOperator* merge_operator;
const CompactionFilter* compaction_filter;
CompactionFilterFactory* compaction_filter_factory;
CompactionFilterFactoryV2* compaction_filter_factory_v2;
Logger* info_log;
Statistics* statistics;
InfoLogLevel info_log_level;
Env* env;
// Allow the OS to mmap file for reading sst tables. Default: false
bool allow_mmap_reads;
// Allow the OS to mmap file for writing. Default: false
bool allow_mmap_writes;
std::vector<DbPath> db_paths;
MemTableRepFactory* memtable_factory;
TableFactory* table_factory;
Options::TablePropertiesCollectorFactories
table_properties_collector_factories;
bool advise_random_on_open;
// This options is required by PlainTableReader. May need to move it
// to PlainTalbeOptions just like bloom_bits_per_key
uint32_t bloom_locality;
bool purge_redundant_kvs_while_flush;
uint32_t min_partial_merge_operands;
bool disable_data_sync;
bool use_fsync;
CompressionType compression;
std::vector<CompressionType> compression_per_level;
CompressionOptions compression_opts;
Options::AccessHint access_hint_on_compaction_start;
int num_levels;
};
} // namespace rocksdb

View File

@@ -14,7 +14,6 @@
#include <memory>
#include <vector>
#include <stdint.h>
#include <unordered_map>
#include "rocksdb/version.h"
#include "rocksdb/universal_compaction.h"
@@ -58,7 +57,6 @@ enum CompactionStyle : char {
kCompactionStyleFIFO = 0x2, // FIFO compaction style
};
struct CompactionOptionsFIFO {
// once the total sum of table files reaches this, we will delete the oldest
// table file
@@ -226,12 +224,17 @@ struct ColumnFamilyOptions {
CompressionType compression;
// Different levels can have different compression policies. There
// are cases where most lower levels would like to use quick compression
// algorithms while the higher levels (which have more data) use
// are cases where most lower levels would like to quick compression
// algorithm while the higher levels (which have more data) use
// compression algorithms that have better compression but could
// be slower. This array, if non-empty, should have an entry for
// each level of the database; these override the value specified in
// the previous field 'compression'.
// be slower. This array, if non nullptr, should have an entry for
// each level of the database. This array, if non nullptr, overides the
// value specified in the previous field 'compression'. The caller is
// reponsible for allocating memory and initializing the values in it
// before invoking Open(). The caller is responsible for freeing this
// array and it could be freed anytime after the return from Open().
// This could have been a std::vector but that makes the equivalent
// java/C api hard to construct.
std::vector<CompressionType> compression_per_level;
// different options for compression algorithms
@@ -288,7 +291,7 @@ struct ColumnFamilyOptions {
// and each file on level-3 will be 200MB.
// by default target_file_size_base is 2MB.
uint64_t target_file_size_base;
int target_file_size_base;
// by default target_file_size_multiplier is 1, which means
// by default files in different levels will have similar size.
int target_file_size_multiplier;
@@ -344,7 +347,9 @@ struct ColumnFamilyOptions {
// Default: 0 (disabled)
double hard_rate_limit;
// DEPRECATED -- this options is no longer used
// Max time a put will be stalled when hard_rate_limit is enforced. If 0, then
// there is no limit.
// Default: 1000
unsigned int rate_limit_delay_max_milliseconds;
// size of one block in arena memory allocation.
@@ -612,7 +617,7 @@ struct DBOptions {
// it does not use any locks to prevent concurrent updates.
std::shared_ptr<Statistics> statistics;
// If true, then the contents of manifest and data files are not synced
// If true, then the contents of data files are not synced
// to stable storage. Their contents remain in the OS buffers till the
// OS decides to flush them. This option is good for bulk-loading
// of data. Once the bulk-loading is complete, please issue a
@@ -784,13 +789,12 @@ struct DBOptions {
// Specify the file access pattern once a compaction is started.
// It will be applied to all input files of a compaction.
// Default: NORMAL
enum AccessHint {
NONE,
NORMAL,
SEQUENTIAL,
WILLNEED
};
AccessHint access_hint_on_compaction_start;
enum {
NONE,
NORMAL,
SEQUENTIAL,
WILLNEED
} access_hint_on_compaction_start;
// Use adaptive mutex, which spins in the user space before resorting
// to kernel. This could reduce context switch when the mutex is not
@@ -799,6 +803,10 @@ struct DBOptions {
// Default: false
bool use_adaptive_mutex;
// Allow RocksDB to use thread local storage to optimize performance.
// Default: true
bool allow_thread_local;
// Create DBOptions with default values for all fields
DBOptions();
// Create DBOptions from Options
@@ -895,18 +903,6 @@ struct ReadOptions {
// ! DEPRECATED
// const Slice* prefix;
// "iterate_upper_bound" defines the extent upto which the forward iterator
// can returns entries. Once the bound is reached, Valid() will be false.
// "iterate_upper_bound" is exclusive ie the bound value is
// not a valid entry. If iterator_extractor is not null, the Seek target
// and iterator_upper_bound need to have the same prefix.
// This is because ordering is not guaranteed outside of prefix domain.
// There is no lower bound on the iterator. If needed, that can be easily
// implemented
//
// Default: nullptr
const Slice* iterate_upper_bound;
// Specify if this read request should process data that ALREADY
// resides on a particular cache. If the required data is not
// found at the specified cache, then Status::Incomplete is returned.
@@ -930,7 +926,6 @@ struct ReadOptions {
: verify_checksums(true),
fill_cache(true),
snapshot(nullptr),
iterate_upper_bound(nullptr),
read_tier(kReadAllTier),
tailing(false),
total_order_seek(false) {}
@@ -938,7 +933,6 @@ struct ReadOptions {
: verify_checksums(cksum),
fill_cache(cache),
snapshot(nullptr),
iterate_upper_bound(nullptr),
read_tier(kReadAllTier),
tailing(false),
total_order_seek(false) {}
@@ -1011,12 +1005,6 @@ extern Options GetOptions(size_t total_write_buffer_limit,
int read_amplification_threshold = 8,
int write_amplification_threshold = 32,
uint64_t target_db_size = 68719476736 /* 64GB */);
bool GetOptionsFromStrings(
const Options& base_options,
const std::unordered_map<std::string, std::string>& options_map,
Options* new_options);
} // namespace rocksdb
#endif // STORAGE_ROCKSDB_INCLUDE_OPTIONS_H_

View File

@@ -115,7 +115,7 @@ enum Tickers : uint32_t {
// head of the writers queue.
WRITE_DONE_BY_SELF,
WRITE_DONE_BY_OTHER,
WRITE_TIMEDOUT, // Number of writes ending up with timed-out.
WRITE_TIMEDOUT, // Number of writes ending up with timed-out.
WRITE_WITH_WAL, // Number of Write calls that request WAL
COMPACT_READ_BYTES, // Bytes read during compaction
COMPACT_WRITE_BYTES, // Bytes written during compaction
@@ -212,6 +212,7 @@ enum Histograms : uint32_t {
READ_BLOCK_COMPACTION_MICROS,
READ_BLOCK_GET_MICROS,
WRITE_RAW_BLOCK_MICROS,
STALL_L0_SLOWDOWN_COUNT,
STALL_MEMTABLE_COMPACTION_COUNT,
STALL_L0_NUM_FILES_COUNT,
@@ -219,7 +220,6 @@ enum Histograms : uint32_t {
SOFT_RATE_LIMIT_DELAY_COUNT,
NUM_FILES_IN_SINGLE_COMPACTION,
DB_SEEK,
WRITE_STALL,
HISTOGRAM_ENUM_MAX,
};

View File

@@ -23,7 +23,6 @@
#include "rocksdb/env.h"
#include "rocksdb/iterator.h"
#include "rocksdb/options.h"
#include "rocksdb/immutable_options.h"
#include "rocksdb/status.h"
namespace rocksdb {
@@ -251,46 +250,23 @@ struct CuckooTablePropertyNames {
// Denotes if the key sorted in the file is Internal Key (if false)
// or User Key only (if true).
static const std::string kIsLastLevel;
// Indicate if using identity function for the first hash function.
static const std::string kIdentityAsFirstHash;
// Indicate if using module or bit and to calculate hash value
static const std::string kUseModuleHash;
// Fixed user key length
static const std::string kUserKeyLength;
};
struct CuckooTableOptions {
// Determines the utilization of hash tables. Smaller values
// result in larger hash tables with fewer collisions.
double hash_table_ratio = 0.9;
// A property used by builder to determine the depth to go to
// to search for a path to displace elements in case of
// collision. See Builder.MakeSpaceForKey method. Higher
// values result in more efficient hash tables with fewer
// lookups but take more time to build.
uint32_t max_search_depth = 100;
// In case of collision while inserting, the builder
// attempts to insert in the next cuckoo_block_size
// locations before skipping over to the next Cuckoo hash
// function. This makes lookups more cache friendly in case
// of collisions.
uint32_t cuckoo_block_size = 5;
// If this option is enabled, user key is treated as uint64_t and its value
// is used as hash value directly. This option changes builder's behavior.
// Reader ignore this option and behave according to what specified in table
// property.
bool identity_as_first_hash = false;
// If this option is set to true, module is used during hash calculation.
// This often yields better space efficiency at the cost of performance.
// If this optino is set to false, # of entries in table is constrained to be
// power of two, and bit and is used to calculate hash, which is faster in
// general.
bool use_module_hash = true;
};
// Cuckoo Table Factory for SST table format using Cache Friendly Cuckoo Hashing
extern TableFactory* NewCuckooTableFactory(
const CuckooTableOptions& table_options = CuckooTableOptions());
// @hash_table_ratio: Determines the utilization of hash tables. Smaller values
// result in larger hash tables with fewer collisions.
// @max_search_depth: A property used by builder to determine the depth to go to
// to search for a path to displace elements in case of
// collision. See Builder.MakeSpaceForKey method. Higher
// values result in more efficient hash tables with fewer
// lookups but take more time to build.
// @cuckoo_block_size: In case of collision while inserting, the builder
// attempts to insert in the next cuckoo_block_size
// locations before skipping over to the next Cuckoo hash
// function. This makes lookups more cache friendly in case
// of collisions.
extern TableFactory* NewCuckooTableFactory(double hash_table_ratio = 0.9,
uint32_t max_search_depth = 100, uint32_t cuckoo_block_size = 5);
#endif // ROCKSDB_LITE
@@ -317,15 +293,14 @@ class TableFactory {
// and cache the table object returned.
// (1) SstFileReader (for SST Dump) opens the table and dump the table
// contents using the interator of the table.
// ImmutableCFOptions is a subset of Options that can not be altered.
// EnvOptions is a subset of Options that will be used by Env.
// options and soptions are options. options is the general options.
// Multiple configured can be accessed from there, including and not
// limited to block cache and key comparators.
// file is a file handler to handle the file for the table
// file_size is the physical file size of the file
// table_reader is the output table reader
virtual Status NewTableReader(
const ImmutableCFOptions& ioptions, const EnvOptions& env_options,
const Options& options, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const = 0;
@@ -343,17 +318,14 @@ class TableFactory {
// (4) When running Repairer, it creates a table builder to convert logs to
// SST files (In Repairer::ConvertLogToTable() by calling BuildTable())
//
// ImmutableCFOptions is a subset of Options that can not be altered.
// Multiple configured can be acceseed from there, including and not limited
// to compression options. file is a handle of a writable file.
// It is the caller's responsibility to keep the file open and close the file
// after closing the table builder. compression_type is the compression type
// to use in this table.
// options is the general options. Multiple configured can be acceseed from
// there, including and not limited to compression options.
// file is a handle of a writable file. It is the caller's responsibility to
// keep the file open and close the file after closing the table builder.
// compression_type is the compression type to use in this table.
virtual TableBuilder* NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const = 0;
const Options& options, const InternalKeyComparator& internal_comparator,
WritableFile* file, CompressionType compression_type) const = 0;
// Sanitizes the specified DB Options.
//

View File

@@ -10,10 +10,7 @@
#pragma once
#ifndef ROCKSDB_LITE
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <string>
#include <map>
@@ -130,41 +127,9 @@ struct BackupInfo {
int64_t timestamp;
uint64_t size;
uint32_t number_files;
BackupInfo() {}
BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size,
uint32_t _number_files)
: backup_id(_backup_id), timestamp(_timestamp), size(_size),
number_files(_number_files) {}
};
class BackupStatistics {
public:
BackupStatistics() {
number_success_backup = 0;
number_fail_backup = 0;
}
BackupStatistics(uint32_t _number_success_backup,
uint32_t _number_fail_backup)
: number_success_backup(_number_success_backup),
number_fail_backup(_number_fail_backup) {}
~BackupStatistics() {}
void IncrementNumberSuccessBackup();
void IncrementNumberFailBackup();
uint32_t GetNumberSuccessBackup() const;
uint32_t GetNumberFailBackup() const;
std::string ToString() const;
private:
uint32_t number_success_backup;
uint32_t number_fail_backup;
BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size)
: backup_id(_backup_id), timestamp(_timestamp), size(_size) {}
};
class BackupEngineReadOnly {

View File

@@ -11,9 +11,8 @@
#pragma once
#include "rocksdb/comparator.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "rocksdb/slice.h"
#include "rocksdb/write_batch.h"
namespace rocksdb {
@@ -57,14 +56,12 @@ class WBWIIterator {
// A user can call NewIterator() to create an iterator.
class WriteBatchWithIndex {
public:
// backup_index_comparator: the backup comparator used to compare keys
// within the same column family, if column family is not given in the
// interface, or we can't find a column family from the column family handle
// passed in, backup_index_comparator will be used for the column family.
// index_comparator indicates the order when iterating data in the write
// batch. Technically, it doesn't have to be the same as the one used in
// the DB.
// reserved_bytes: reserved bytes in underlying WriteBatch
explicit WriteBatchWithIndex(
const Comparator* backup_index_comparator = BytewiseComparator(),
size_t reserved_bytes = 0);
explicit WriteBatchWithIndex(const Comparator* index_comparator,
size_t reserved_bytes = 0);
virtual ~WriteBatchWithIndex();
WriteBatch* GetWriteBatch();

View File

@@ -7,7 +7,7 @@
// Also update Makefile if you change these
#define ROCKSDB_MAJOR 3
#define ROCKSDB_MINOR 5
#define ROCKSDB_PATCH 0
#define ROCKSDB_PATCH 1
// Do not use these. We made the mistake of declaring macros starting with
// double underscore. Now we have to live with our choice. We'll deprecate these