From 78ce24a7099f64d04d9d8ea444b23834b4f50da3 Mon Sep 17 00:00:00 2001 From: Kai Liu Date: Fri, 14 Feb 2014 22:22:06 -0800 Subject: [PATCH 01/14] Fix the lint issues in dev box Summary: Owing to the difference between platforms (my macbook and dev server), arc lint throws fatal error in dev box. To fix the problem (quickly), I removed all incompatible function calls. Test Plan: ran `arc lint` in dev box and passed. Reviewers: igor, yhchiang CC: leveldb Differential Revision: https://reviews.facebook.net/D16173 --- linters/cpp_linter/ArcanistCpplintLinter.php | 31 ++------------------ 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/linters/cpp_linter/ArcanistCpplintLinter.php b/linters/cpp_linter/ArcanistCpplintLinter.php index cb78422483..b9c4137555 100644 --- a/linters/cpp_linter/ArcanistCpplintLinter.php +++ b/linters/cpp_linter/ArcanistCpplintLinter.php @@ -19,32 +19,8 @@ final class ArcanistCpplintLinter extends ArcanistLinter { return 'cpplint.py'; } - public function getLintOptions() { - $config = $this->getEngine()->getConfigurationManager(); - $options = $config->getConfigFromAnySource('lint.cpplint.options', ''); - - return $options; - } - public function getLintPath() { - $config = $this->getEngine()->getConfigurationManager(); - $prefix = $config->getConfigFromAnySource('lint.cpplint.prefix'); - $bin = $config->getConfigFromAnySource('lint.cpplint.bin', 'cpplint.py'); - - if ($prefix !== null) { - if (!Filesystem::pathExists($prefix.'/'.$bin)) { - throw new ArcanistUsageException( - "Unable to find cpplint.py binary in a specified directory. Make ". - "sure that 'lint.cpplint.prefix' and 'lint.cpplint.bin' keys are ". - "set correctly. If you'd rather use a copy of cpplint installed ". - "globally, you can just remove these keys from your .arcconfig."); - } - - $bin = csprintf("%s/%s", $prefix, $bin); - - return $bin; - } - + $bin = 'cpplint.py'; // Search under current dir list($err) = exec_manual('which %s/%s', $this->linterDir(), $bin); if (!$err) { @@ -57,7 +33,7 @@ final class ArcanistCpplintLinter extends ArcanistLinter { throw new ArcanistUsageException( "cpplint.py does not appear to be installed on this system. Install ". "it (e.g., with 'wget \"http://google-styleguide.googlecode.com/". - "svn/trunk/cpplint/cpplint.py\"') or configure 'lint.cpplint.prefix' ". + "svn/trunk/cpplint/cpplint.py\"') ". "in your .arcconfig to point to the directory where it resides. ". "Also don't forget to chmod a+x cpplint.py!"); } @@ -67,10 +43,9 @@ final class ArcanistCpplintLinter extends ArcanistLinter { public function lintPath($path) { $bin = $this->getLintPath(); - $options = $this->getLintOptions(); $path = $this->rocksdbDir().'/'.$path; - $f = new ExecFuture("%C %C $path", $bin, $options); + $f = new ExecFuture("%C $path", $bin); list($err, $stdout, $stderr) = $f->resolve(); From 83e7842f80aa96b7782f01ef05caf0f90dfbb736 Mon Sep 17 00:00:00 2001 From: kailiu Date: Wed, 19 Feb 2014 01:00:54 -0800 Subject: [PATCH 02/14] Improve the check for header guard Summary: cpplint.py only recognize `#ifdef HEADER_GUARD` as header guard. This patch enables the check for `#pragma once`. Test Plan: New arc lint exclude the false alarm for `#pragma once`. Reviewers: dhruba, sdong, igor, haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D16227 --- linters/cpp_linter/cpplint.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/linters/cpp_linter/cpplint.py b/linters/cpp_linter/cpplint.py index 814b44f20d..5f7e169bb1 100755 --- a/linters/cpp_linter/cpplint.py +++ b/linters/cpp_linter/cpplint.py @@ -1420,6 +1420,9 @@ def CheckForHeaderGuard(filename, lines, error): endif = None endif_linenum = 0 for linenum, line in enumerate(lines): + # Already been well guarded, no need for further checking. + if line.strip() == "#pragma once": + return linesplit = line.split() if len(linesplit) >= 2: # find the first occurrence of #ifndef and #define, save arg From e90d3f7752f9e927d6288cc7099c179fa37e87f1 Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 19 Feb 2014 10:10:45 -0800 Subject: [PATCH 03/14] First Transaction Logs Should Not Skip Storage Options Given Summary: Currently, the first transaction log file ignore bytes_per_sync and other storage-related options. It is not consistent. Fix it. Test Plan: make all check. See the options set in GDB. Reviewers: haobo, kailiu Reviewed By: haobo CC: igor, ljin, yhchiang, leveldb, dhruba Differential Revision: https://reviews.facebook.net/D16215 --- db/db_impl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 575307bcea..041c621079 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3701,7 +3701,7 @@ DB::~DB() { } Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) { *dbptr = nullptr; - EnvOptions soptions; + EnvOptions soptions(options); if (options.block_cache != nullptr && options.no_block_cache) { return Status::InvalidArgument( From d74c9b79ea3328d13bf1e2bae5cdc28a17600e19 Mon Sep 17 00:00:00 2001 From: Thomas Adam Date: Wed, 12 Feb 2014 22:49:00 +0100 Subject: [PATCH 04/14] Enhancements to the API --- db/c.cc | 520 ++++++++++++++++++++++++++++++++++++++++---- include/rocksdb/c.h | 198 +++++++++++++++-- 2 files changed, 657 insertions(+), 61 deletions(-) diff --git a/db/c.cc b/db/c.cc index 68f3613367..f3a200bbf2 100644 --- a/db/c.cc +++ b/db/c.cc @@ -17,11 +17,14 @@ #include "rocksdb/env.h" #include "rocksdb/filter_policy.h" #include "rocksdb/iterator.h" +#include "rocksdb/merge_operator.h" #include "rocksdb/options.h" #include "rocksdb/status.h" #include "rocksdb/write_batch.h" #include "rocksdb/memtablerep.h" #include "rocksdb/universal_compaction.h" +#include "rocksdb/statistics.h" +#include "rocksdb/slice_transform.h" using rocksdb::Cache; using rocksdb::Comparator; @@ -30,8 +33,10 @@ using rocksdb::DB; using rocksdb::Env; using rocksdb::FileLock; using rocksdb::FilterPolicy; +using rocksdb::FlushOptions; using rocksdb::Iterator; using rocksdb::Logger; +using rocksdb::MergeOperator; using rocksdb::NewBloomFilterPolicy; using rocksdb::NewLRUCache; using rocksdb::Options; @@ -40,6 +45,7 @@ using rocksdb::Range; using rocksdb::ReadOptions; using rocksdb::SequentialFile; using rocksdb::Slice; +using rocksdb::SliceTransform; using rocksdb::Snapshot; using rocksdb::Status; using rocksdb::WritableFile; @@ -50,19 +56,20 @@ using std::shared_ptr; extern "C" { -struct rocksdb_t { DB* rep; }; -struct rocksdb_iterator_t { Iterator* rep; }; -struct rocksdb_writebatch_t { WriteBatch rep; }; -struct rocksdb_snapshot_t { const Snapshot* rep; }; -struct rocksdb_readoptions_t { ReadOptions rep; }; -struct rocksdb_writeoptions_t { WriteOptions rep; }; -struct rocksdb_options_t { Options rep; }; -struct rocksdb_seqfile_t { SequentialFile* rep; }; -struct rocksdb_randomfile_t { RandomAccessFile* rep; }; -struct rocksdb_writablefile_t { WritableFile* rep; }; -struct rocksdb_filelock_t { FileLock* rep; }; -struct rocksdb_logger_t { shared_ptr rep; }; -struct rocksdb_cache_t { shared_ptr rep; }; +struct rocksdb_t { DB* rep; }; +struct rocksdb_iterator_t { Iterator* rep; }; +struct rocksdb_writebatch_t { WriteBatch rep; }; +struct rocksdb_snapshot_t { const Snapshot* rep; }; +struct rocksdb_flushoptions_t { FlushOptions rep; }; +struct rocksdb_readoptions_t { ReadOptions rep; }; +struct rocksdb_writeoptions_t { WriteOptions rep; }; +struct rocksdb_options_t { Options rep; }; +struct rocksdb_seqfile_t { SequentialFile* rep; }; +struct rocksdb_randomfile_t { RandomAccessFile* rep; }; +struct rocksdb_writablefile_t { WritableFile* rep; }; +struct rocksdb_filelock_t { FileLock* rep; }; +struct rocksdb_logger_t { shared_ptr rep; }; +struct rocksdb_cache_t { shared_ptr rep; }; struct rocksdb_comparator_t : public Comparator { void* state_; @@ -122,7 +129,6 @@ struct rocksdb_filterpolicy_t : public FilterPolicy { size_t len; char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len); dst->append(filter, len); - free(filter); } virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { @@ -131,16 +137,136 @@ struct rocksdb_filterpolicy_t : public FilterPolicy { } }; +struct rocksdb_mergeoperator_t : public MergeOperator { + void* state_; + void (*destructor_)(void*); + const char* (*name_)(void*); + char* (*full_merge_)( + void*, + const char* key, size_t key_length, + const char* existing_value, size_t existing_value_length, + const char* const* operands_list, const size_t* operands_list_length, + int num_operands, + unsigned char* success, size_t* new_value_length); + char* (*partial_merge_)( + void*, + const char* key, size_t key_length, + const char* left_operand, size_t left_operand_length, + const char* right_operand, size_t right_operand_length, + unsigned char* success, size_t* new_value_length); + + virtual ~rocksdb_mergeoperator_t() { + (*destructor_)(state_); + } + + virtual const char* Name() const { + return (*name_)(state_); + } + + virtual bool FullMerge( + const Slice& key, + const Slice* existing_value, + const std::deque& operand_list, + std::string* new_value, + Logger* logger) const { + + size_t n = operand_list.size(); + std::vector operand_pointers(n); + std::vector operand_sizes(n); + for (size_t i = 0; i < n; i++) { + Slice operand(operand_list[i]); + operand_pointers[i] = operand.data(); + operand_sizes[i] = operand.size(); + } + + const char* existing_value_data = nullptr; + size_t existing_value_len = 0; + if (existing_value != nullptr) { + existing_value_data = existing_value->data(); + existing_value_len = existing_value->size(); + } + + unsigned char success; + size_t new_value_len; + char* tmp_new_value = (*full_merge_)( + state_, + key.data(), key.size(), + existing_value_data, existing_value_len, + &operand_pointers[0], &operand_sizes[0], n, + &success, &new_value_len); + new_value->assign(tmp_new_value, new_value_len); + + return success; + } + + virtual bool PartialMerge( + const Slice& key, + const Slice& left_operand, + const Slice& right_operand, + std::string* new_value, + Logger* logger) const { + + unsigned char success; + size_t new_value_len; + char* tmp_new_value = (*partial_merge_)( + state_, + key.data(), key.size(), + left_operand.data(), left_operand.size(), + right_operand.data(), right_operand.size(), + &success, &new_value_len); + new_value->assign(tmp_new_value, new_value_len); + + return success; + } +}; + struct rocksdb_env_t { Env* rep; bool is_default; }; +struct rocksdb_slicetransform_t : public SliceTransform { + void* state_; + void (*destructor_)(void*); + const char* (*name_)(void*); + char* (*transform_)( + void*, + const char* key, size_t length, + size_t* dst_length); + unsigned char (*in_domain_)( + void*, + const char* key, size_t length); + unsigned char (*in_range_)( + void*, + const char* key, size_t length); + + virtual ~rocksdb_slicetransform_t() { + (*destructor_)(state_); + } + + virtual const char* Name() const { + return (*name_)(state_); + } + + virtual Slice Transform(const Slice& src) const { + size_t len; + char* dst = (*transform_)(state_, src.data(), src.size(), &len); + return Slice(dst, len); + } + + virtual bool InDomain(const Slice& src) const { + return (*in_domain_)(state_, src.data(), src.size()); + } + + virtual bool InRange(const Slice& src) const { + return (*in_range_)(state_, src.data(), src.size()); + } +}; + struct rocksdb_universal_compaction_options_t { rocksdb::CompactionOptionsUniversal *rep; }; - static bool SaveError(char** errptr, const Status& s) { assert(errptr != NULL); if (s.ok()) { @@ -197,6 +323,15 @@ void rocksdb_delete( SaveError(errptr, db->rep->Delete(options->rep, Slice(key, keylen))); } +void rocksdb_merge( + rocksdb_t* db, + const rocksdb_writeoptions_t* options, + const char* key, size_t keylen, + const char* val, size_t vallen, + char** errptr) { + SaveError(errptr, + db->rep->Merge(options->rep, Slice(key, keylen), Slice(val, vallen))); +} void rocksdb_write( rocksdb_t* db, @@ -287,6 +422,26 @@ void rocksdb_compact_range( (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL)); } +void rocksdb_flush( + rocksdb_t* db, + const rocksdb_flushoptions_t* options, + char** errptr) { + SaveError(errptr, db->rep->Flush(options->rep)); +} + +void rocksdb_disable_file_deletions( + rocksdb_t* db, + char** errptr) { + SaveError(errptr, db->rep->DisableFileDeletions()); +} + +void rocksdb_enable_file_deletions( + rocksdb_t* db, + unsigned char force, + char** errptr) { + SaveError(errptr, db->rep->EnableFileDeletions(force)); +} + void rocksdb_destroy_db( const rocksdb_options_t* options, const char* name, @@ -365,6 +520,13 @@ void rocksdb_writebatch_put( b->rep.Put(Slice(key, klen), Slice(val, vlen)); } +void rocksdb_writebatch_merge( + rocksdb_writebatch_t* b, + const char* key, size_t klen, + const char* val, size_t vlen) { + b->rep.Merge(Slice(key, klen), Slice(val, vlen)); +} + void rocksdb_writebatch_delete( rocksdb_writebatch_t* b, const char* key, size_t klen) { @@ -409,6 +571,12 @@ void rocksdb_options_set_comparator( opt->rep.comparator = cmp; } +void rocksdb_options_set_merger_operator( + rocksdb_options_t* opt, + rocksdb_mergeoperator_t* merge_operator) { + opt->rep.merge_operator = std::shared_ptr(merge_operator); +} + void rocksdb_options_set_filter_policy( rocksdb_options_t* opt, rocksdb_filterpolicy_t* policy) { @@ -454,6 +622,12 @@ void rocksdb_options_set_cache(rocksdb_options_t* opt, rocksdb_cache_t* c) { } } +void rocksdb_options_set_cache_compressed(rocksdb_options_t* opt, rocksdb_cache_t* c) { + if (c) { + opt->rep.block_cache_compressed = c->rep; + } +} + void rocksdb_options_set_block_size(rocksdb_options_t* opt, size_t s) { opt->rep.block_size = s; } @@ -492,6 +666,10 @@ void rocksdb_options_set_max_grandparent_overlap_factor( opt->rep.max_grandparent_overlap_factor = n; } +void rocksdb_options_enable_statistics(rocksdb_options_t* opt) { + opt->rep.statistics = rocksdb::CreateDBStatistics(); +} + void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) { opt->rep.num_levels = n; } @@ -537,6 +715,16 @@ void rocksdb_options_set_compression_options( opt->rep.compression_opts.strategy = strategy; } +void rocksdb_options_set_prefix_extractor( + rocksdb_options_t* opt, rocksdb_slicetransform_t* prefix_extractor) { + opt->rep.prefix_extractor = prefix_extractor; +} + +void rocksdb_options_set_whole_key_filtering( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.whole_key_filtering = v; +} + void rocksdb_options_set_disable_data_sync( rocksdb_options_t* opt, int disable_data_sync) { opt->rep.disableDataSync = disable_data_sync; @@ -557,6 +745,11 @@ void rocksdb_options_set_db_log_dir( opt->rep.db_log_dir = db_log_dir; } +void rocksdb_options_set_wal_dir( + rocksdb_options_t* opt, const char* v) { + opt->rep.wal_dir = v; +} + void rocksdb_options_set_WAL_ttl_seconds(rocksdb_options_t* opt, uint64_t ttl) { opt->rep.WAL_ttl_seconds = ttl; } @@ -566,6 +759,76 @@ void rocksdb_options_set_WAL_size_limit_MB( opt->rep.WAL_size_limit_MB = limit; } +void rocksdb_options_set_manifest_preallocation_size( + rocksdb_options_t* opt, size_t v) { + opt->rep.manifest_preallocation_size = v; +} + +void rocksdb_options_set_purge_redundant_kvs_while_flush( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.purge_redundant_kvs_while_flush = v; +} + +void rocksdb_options_set_allow_os_buffer( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.allow_os_buffer = v; +} + +void rocksdb_options_set_allow_mmap_reads( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.allow_mmap_reads = v; +} + +void rocksdb_options_set_allow_mmap_writes( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.allow_mmap_writes = v; +} + +void rocksdb_options_set_is_fd_close_on_exec( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.is_fd_close_on_exec = v; +} + +void rocksdb_options_set_skip_log_error_on_recovery( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.skip_log_error_on_recovery = v; +} + +void rocksdb_options_set_stats_dump_period_sec( + rocksdb_options_t* opt, unsigned int v) { + opt->rep.stats_dump_period_sec = v; +} + +void rocksdb_options_set_block_size_deviation( + rocksdb_options_t* opt, int v) { + opt->rep.block_size_deviation = v; +} + +void rocksdb_options_set_advise_random_on_open( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.advise_random_on_open = v; +} + +void rocksdb_options_set_use_adaptive_mutex( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.use_adaptive_mutex = v; +} + +void rocksdb_options_set_bytes_per_sync( + rocksdb_options_t* opt, uint64_t v) { + opt->rep.bytes_per_sync = v; +} + +void rocksdb_options_set_filter_deletes( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.filter_deletes = v; +} + +void rocksdb_options_set_max_sequential_skip_in_iterations( + rocksdb_options_t* opt, uint64_t v) { + opt->rep.max_sequential_skip_in_iterations = v; +} + void rocksdb_options_set_max_write_buffer_number(rocksdb_options_t* opt, int n) { opt->rep.max_write_buffer_number = n; } @@ -582,6 +845,56 @@ void rocksdb_options_set_max_background_flushes(rocksdb_options_t* opt, int n) { opt->rep.max_background_flushes = n; } +void rocksdb_options_set_max_log_file_size(rocksdb_options_t* opt, size_t v) { + opt->rep.max_log_file_size = v; +} + +void rocksdb_options_set_log_file_time_to_roll(rocksdb_options_t* opt, size_t v) { + opt->rep.log_file_time_to_roll = v; +} + +void rocksdb_options_set_keep_log_file_num(rocksdb_options_t* opt, size_t v) { + opt->rep.keep_log_file_num = v; +} + +void rocksdb_options_set_soft_rate_limit(rocksdb_options_t* opt, double v) { + opt->rep.soft_rate_limit = v; +} + +void rocksdb_options_set_hard_rate_limit(rocksdb_options_t* opt, double v) { + opt->rep.hard_rate_limit = v; +} + +void rocksdb_options_set_rate_limit_delay_max_milliseconds( + rocksdb_options_t* opt, unsigned int v) { + opt->rep.rate_limit_delay_max_milliseconds = v; +} + +void rocksdb_options_set_max_manifest_file_size( + rocksdb_options_t* opt, size_t v) { + opt->rep.max_manifest_file_size = v; +} + +void rocksdb_options_set_no_block_cache( + rocksdb_options_t* opt, unsigned char v) { + opt->rep.no_block_cache = v; +} + +void rocksdb_options_set_table_cache_numshardbits( + rocksdb_options_t* opt, int v) { + opt->rep.table_cache_numshardbits = v; +} + +void rocksdb_options_set_table_cache_remove_scan_count_limit( + rocksdb_options_t* opt, int v) { + opt->rep.table_cache_remove_scan_count_limit = v; +} + +void rocksdb_options_set_arena_block_size( + rocksdb_options_t* opt, size_t v) { + opt->rep.arena_block_size = v; +} + void rocksdb_options_set_disable_auto_compactions(rocksdb_options_t* opt, int disable) { opt->rep.disable_auto_compactions = disable; } @@ -590,6 +903,11 @@ void rocksdb_options_set_disable_seek_compaction(rocksdb_options_t* opt, int dis opt->rep.disable_seek_compaction = disable; } +void rocksdb_options_set_delete_obsolete_files_period_micros( + rocksdb_options_t* opt, uint64_t v) { + opt->rep.delete_obsolete_files_period_micros = v; +} + void rocksdb_options_set_source_compaction_factor( rocksdb_options_t* opt, int n) { opt->rep.expanded_compaction_factor = n; @@ -607,6 +925,21 @@ void rocksdb_options_set_memtable_vector_rep(rocksdb_options_t *opt) { opt->rep.memtable_factory.reset(factory); } +void rocksdb_options_set_memtable_prefix_bloom_bits( + rocksdb_options_t* opt, uint32_t v) { + opt->rep.memtable_prefix_bloom_bits = v; +} + +void rocksdb_options_set_memtable_prefix_bloom_probes( + rocksdb_options_t* opt, uint32_t v) { + opt->rep.memtable_prefix_bloom_probes = v; +} + +void rocksdb_options_set_max_successive_merges( + rocksdb_options_t* opt, size_t v) { + opt->rep.max_successive_merges = v; +} + void rocksdb_options_set_compaction_style(rocksdb_options_t *opt, int style) { opt->rep.compaction_style = static_cast(style); } @@ -617,38 +950,22 @@ void rocksdb_options_set_universal_compaction_options(rocksdb_options_t *opt, ro /* TODO: -merge_operator +DB::OpenForReadOnly +DB::MultiGet +DB::KeyMayExist +DB::GetOptions +DB::GetLiveFiles +DB::GetSortedWalFiles +DB::GetLatestSequenceNumber +DB::GetUpdatesSince +DB::DeleteFile +DB::GetLiveFilesMetaData +DB::GetDbIdentity +DB::RunManualCompaction +custom cache compaction_filter -prefix_extractor -whole_key_filtering max_bytes_for_level_multiplier_additional -delete_obsolete_files_period_micros -max_log_file_size -log_file_time_to_roll -keep_log_file_num -soft_rate_limit -hard_rate_limit -rate_limit_delay_max_milliseconds -max_manifest_file_size -no_block_cache -table_cache_numshardbits -table_cache_remove_scan_count_limit -arena_block_size -manifest_preallocation_size -purge_redundant_kvs_while_flush -allow_os_buffer -allow_mmap_reads -allow_mmap_writes -is_fd_close_on_exec -skip_log_error_on_recovery -stats_dump_period_sec -block_size_deviation -advise_random_on_open access_hint_on_compaction_start -use_adaptive_mutex -bytes_per_sync -filter_deletes -max_sequential_skip_in_iterations table_factory table_properties_collectors inplace_update_support @@ -724,6 +1041,36 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(int bits_per_key) { return wrapper; } +rocksdb_mergeoperator_t* rocksdb_mergeoperator_create( + void* state, + void (*destructor)(void*), + char* (*full_merge)( + void*, + const char* key, size_t key_length, + const char* existing_value, size_t existing_value_length, + const char* const* operands_list, const size_t* operands_list_length, + int num_operands, + unsigned char* success, size_t* new_value_length), + char* (*partial_merge)( + void*, + const char* key, size_t key_length, + const char* left_operand, size_t left_operand_length, + const char* right_operand, size_t right_operand_length, + unsigned char* success, size_t* new_value_length), + const char* (*name)(void*)) { + rocksdb_mergeoperator_t* result = new rocksdb_mergeoperator_t; + result->state_ = state; + result->destructor_ = destructor; + result->full_merge_ = full_merge; + result->partial_merge_ = partial_merge; + result->name_ = name; + return result; +} + +void rocksdb_mergeoperator_destroy(rocksdb_mergeoperator_t* merge_operator) { + delete merge_operator; +} + rocksdb_readoptions_t* rocksdb_readoptions_create() { return new rocksdb_readoptions_t; } @@ -743,12 +1090,33 @@ void rocksdb_readoptions_set_fill_cache( opt->rep.fill_cache = v; } +void rocksdb_readoptions_set_prefix_seek( + rocksdb_readoptions_t* opt, unsigned char v) { + opt->rep.prefix_seek = v; +} + void rocksdb_readoptions_set_snapshot( rocksdb_readoptions_t* opt, const rocksdb_snapshot_t* snap) { opt->rep.snapshot = (snap ? snap->rep : NULL); } +void rocksdb_readoptions_set_prefix( + rocksdb_readoptions_t* opt, const char* key, size_t keylen) { + Slice prefix = Slice(key, keylen); + opt->rep.prefix = &prefix; +} + +void rocksdb_readoptions_set_read_tier( + rocksdb_readoptions_t* opt, int v) { + opt->rep.read_tier = static_cast(v); +} + +void rocksdb_readoptions_set_tailing( + rocksdb_readoptions_t* opt, unsigned char v) { + opt->rep.tailing = v; +} + rocksdb_writeoptions_t* rocksdb_writeoptions_create() { return new rocksdb_writeoptions_t; } @@ -767,6 +1135,19 @@ void rocksdb_writeoptions_disable_WAL(rocksdb_writeoptions_t* opt, int disable) } +rocksdb_flushoptions_t* rocksdb_flushoptions_create() { + return new rocksdb_flushoptions_t; +} + +void rocksdb_flushoptions_destroy(rocksdb_flushoptions_t* opt) { + delete opt; +} + +void rocksdb_flushoptions_set_wait( + rocksdb_flushoptions_t* opt, unsigned char v) { + opt->rep.wait = v; +} + rocksdb_cache_t* rocksdb_cache_create_lru(size_t capacity) { rocksdb_cache_t* c = new rocksdb_cache_t; c->rep = NewLRUCache(capacity); @@ -797,6 +1178,57 @@ void rocksdb_env_destroy(rocksdb_env_t* env) { delete env; } +rocksdb_slicetransform_t* rocksdb_slicetransform_create( + void* state, + void (*destructor)(void*), + char* (*transform)( + void*, + const char* key, size_t length, + size_t* dst_length), + unsigned char (*in_domain)( + void*, + const char* key, size_t length), + unsigned char (*in_range)( + void*, + const char* key, size_t length), + const char* (*name)(void*)) { + rocksdb_slicetransform_t* result = new rocksdb_slicetransform_t; + result->state_ = state; + result->destructor_ = destructor; + result->transform_ = transform; + result->in_domain_ = in_domain; + result->in_range_ = in_range; + result->name_ = name; + return result; +} + +void rocksdb_slicetransform_destroy(rocksdb_slicetransform_t* st) { + delete st; +} + +rocksdb_slicetransform_t* rocksdb_slicetransform_create_fixed_prefix(size_t prefixLen) { + struct Wrapper : public rocksdb_slicetransform_t { + const SliceTransform* rep_; + ~Wrapper() { delete rep_; } + const char* Name() const { return rep_->Name(); } + Slice Transform(const Slice& src) const { + return rep_->Transform(src); + } + bool InDomain(const Slice& src) const { + return rep_->InDomain(src); + } + bool InRange(const Slice& src) const { + return rep_->InRange(src); + } + static void DoNothing(void*) { } + }; + Wrapper* wrapper = new Wrapper; + wrapper->rep_ = rocksdb::NewFixedPrefixTransform(prefixLen); + wrapper->state_ = NULL; + wrapper->destructor_ = &Wrapper::DoNothing; + return wrapper; +} + rocksdb_universal_compaction_options_t* rocksdb_universal_compaction_options_create() { rocksdb_universal_compaction_options_t* result = new rocksdb_universal_compaction_options_t; result->rep = new rocksdb::CompactionOptionsUniversal; diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 405b292daa..c2382e1997 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -54,22 +54,25 @@ extern "C" { /* Exported types */ -typedef struct rocksdb_t rocksdb_t; -typedef struct rocksdb_cache_t rocksdb_cache_t; -typedef struct rocksdb_comparator_t rocksdb_comparator_t; -typedef struct rocksdb_env_t rocksdb_env_t; -typedef struct rocksdb_filelock_t rocksdb_filelock_t; -typedef struct rocksdb_filterpolicy_t rocksdb_filterpolicy_t; -typedef struct rocksdb_iterator_t rocksdb_iterator_t; -typedef struct rocksdb_logger_t rocksdb_logger_t; -typedef struct rocksdb_options_t rocksdb_options_t; -typedef struct rocksdb_randomfile_t rocksdb_randomfile_t; -typedef struct rocksdb_readoptions_t rocksdb_readoptions_t; -typedef struct rocksdb_seqfile_t rocksdb_seqfile_t; -typedef struct rocksdb_snapshot_t rocksdb_snapshot_t; -typedef struct rocksdb_writablefile_t rocksdb_writablefile_t; -typedef struct rocksdb_writebatch_t rocksdb_writebatch_t; -typedef struct rocksdb_writeoptions_t rocksdb_writeoptions_t; +typedef struct rocksdb_t rocksdb_t; +typedef struct rocksdb_cache_t rocksdb_cache_t; +typedef struct rocksdb_comparator_t rocksdb_comparator_t; +typedef struct rocksdb_env_t rocksdb_env_t; +typedef struct rocksdb_filelock_t rocksdb_filelock_t; +typedef struct rocksdb_filterpolicy_t rocksdb_filterpolicy_t; +typedef struct rocksdb_flushoptions_t rocksdb_flushoptions_t; +typedef struct rocksdb_iterator_t rocksdb_iterator_t; +typedef struct rocksdb_logger_t rocksdb_logger_t; +typedef struct rocksdb_mergeoperator_t rocksdb_mergeoperator_t; +typedef struct rocksdb_options_t rocksdb_options_t; +typedef struct rocksdb_randomfile_t rocksdb_randomfile_t; +typedef struct rocksdb_readoptions_t rocksdb_readoptions_t; +typedef struct rocksdb_seqfile_t rocksdb_seqfile_t; +typedef struct rocksdb_slicetransform_t rocksdb_slicetransform_t; +typedef struct rocksdb_snapshot_t rocksdb_snapshot_t; +typedef struct rocksdb_writablefile_t rocksdb_writablefile_t; +typedef struct rocksdb_writebatch_t rocksdb_writebatch_t; +typedef struct rocksdb_writeoptions_t rocksdb_writeoptions_t; typedef struct rocksdb_universal_compaction_options_t rocksdb_universal_compaction_options_t; /* DB operations */ @@ -94,6 +97,13 @@ extern void rocksdb_delete( const char* key, size_t keylen, char** errptr); +extern void rocksdb_merge( + rocksdb_t* db, + const rocksdb_writeoptions_t* options, + const char* key, size_t keylen, + const char* val, size_t vallen, + char** errptr); + extern void rocksdb_write( rocksdb_t* db, const rocksdb_writeoptions_t* options, @@ -138,6 +148,20 @@ extern void rocksdb_compact_range( const char* start_key, size_t start_key_len, const char* limit_key, size_t limit_key_len); +extern void rocksdb_flush( + rocksdb_t* db, + const rocksdb_flushoptions_t* options, + char** errptr); + +extern void rocksdb_disable_file_deletions( + rocksdb_t* db, + char** errptr); + +extern void rocksdb_enable_file_deletions( + rocksdb_t* db, + unsigned char force, + char** errptr); + /* Management operations */ extern void rocksdb_destroy_db( @@ -172,6 +196,10 @@ extern void rocksdb_writebatch_put( rocksdb_writebatch_t*, const char* key, size_t klen, const char* val, size_t vlen); +extern void rocksdb_writebatch_merge( + rocksdb_writebatch_t*, + const char* key, size_t klen, + const char* val, size_t vlen); extern void rocksdb_writebatch_delete( rocksdb_writebatch_t*, const char* key, size_t klen); @@ -188,6 +216,9 @@ extern void rocksdb_options_destroy(rocksdb_options_t*); extern void rocksdb_options_set_comparator( rocksdb_options_t*, rocksdb_comparator_t*); +extern void rocksdb_options_set_merger_operator( + rocksdb_options_t*, + rocksdb_mergeoperator_t*); extern void rocksdb_options_set_compression_per_level( rocksdb_options_t* opt, int* level_values, @@ -206,10 +237,14 @@ extern void rocksdb_options_set_info_log(rocksdb_options_t*, rocksdb_logger_t*); extern void rocksdb_options_set_write_buffer_size(rocksdb_options_t*, size_t); extern void rocksdb_options_set_max_open_files(rocksdb_options_t*, int); extern void rocksdb_options_set_cache(rocksdb_options_t*, rocksdb_cache_t*); +extern void rocksdb_options_set_cache_compressed(rocksdb_options_t*, rocksdb_cache_t*); extern void rocksdb_options_set_block_size(rocksdb_options_t*, size_t); extern void rocksdb_options_set_block_restart_interval(rocksdb_options_t*, int); extern void rocksdb_options_set_compression_options( rocksdb_options_t*, int, int, int); +extern void rocksdb_options_set_whole_key_filtering(rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_prefix_extractor( + rocksdb_options_t*, rocksdb_slicetransform_t*); extern void rocksdb_options_set_num_levels(rocksdb_options_t*, int); extern void rocksdb_options_set_level0_file_num_compaction_trigger( rocksdb_options_t*, int); @@ -217,23 +252,97 @@ extern void rocksdb_options_set_level0_slowdown_writes_trigger( rocksdb_options_t*, int); extern void rocksdb_options_set_level0_stop_writes_trigger( rocksdb_options_t*, int); +extern void rocksdb_options_set_max_mem_compaction_level( + rocksdb_options_t*, int); extern void rocksdb_options_set_target_file_size_base( rocksdb_options_t*, uint64_t); extern void rocksdb_options_set_target_file_size_multiplier( rocksdb_options_t*, int); +extern void rocksdb_options_set_max_bytes_for_level_base( + rocksdb_options_t*, uint64_t); +extern void rocksdb_options_set_max_bytes_for_level_multiplier( + rocksdb_options_t*, int); +extern void rocksdb_options_set_expanded_compaction_factor( + rocksdb_options_t*, int); +extern void rocksdb_options_set_max_grandparent_overlap_factor( + rocksdb_options_t*, int); +extern void rocksdb_options_enable_statistics(rocksdb_options_t*); + extern void rocksdb_options_set_max_write_buffer_number(rocksdb_options_t*, int); extern void rocksdb_options_set_min_write_buffer_number_to_merge(rocksdb_options_t*, int); extern void rocksdb_options_set_max_background_compactions(rocksdb_options_t*, int); extern void rocksdb_options_set_max_background_flushes(rocksdb_options_t*, int); +extern void rocksdb_options_set_max_log_file_size(rocksdb_options_t*, size_t); +extern void rocksdb_options_set_log_file_time_to_roll(rocksdb_options_t*, size_t); +extern void rocksdb_options_set_keep_log_file_num(rocksdb_options_t*, size_t); +extern void rocksdb_options_set_soft_rate_limit(rocksdb_options_t*, double); +extern void rocksdb_options_set_hard_rate_limit(rocksdb_options_t*, double); +extern void rocksdb_options_set_rate_limit_delay_max_milliseconds( + rocksdb_options_t*, unsigned int); +extern void rocksdb_options_set_max_manifest_file_size( + rocksdb_options_t*, size_t); +extern void rocksdb_options_set_no_block_cache( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_table_cache_numshardbits( + rocksdb_options_t*, int); +extern void rocksdb_options_set_table_cache_remove_scan_count_limit( + rocksdb_options_t*, int); +extern void rocksdb_options_set_arena_block_size( + rocksdb_options_t*, size_t); extern void rocksdb_options_set_use_fsync( rocksdb_options_t*, int); +extern void rocksdb_options_set_db_stats_log_interval( + rocksdb_options_t*, int); +extern void rocksdb_options_set_db_log_dir( + rocksdb_options_t*, const char*); +extern void rocksdb_options_set_wal_dir( + rocksdb_options_t*, const char*); +extern void rocksdb_options_set_WAL_ttl_seconds( + rocksdb_options_t*, uint64_t); +extern void rocksdb_options_set_WAL_size_limit_MB( + rocksdb_options_t*, uint64_t); +extern void rocksdb_options_set_manifest_preallocation_size( + rocksdb_options_t*, size_t); +extern void rocksdb_options_set_purge_redundant_kvs_while_flush( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_allow_os_buffer( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_allow_mmap_reads( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_allow_mmap_writes( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_is_fd_close_on_exec( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_skip_log_error_on_recovery( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_stats_dump_period_sec( + rocksdb_options_t*, unsigned int); +extern void rocksdb_options_set_block_size_deviation( + rocksdb_options_t*, int); +extern void rocksdb_options_set_advise_random_on_open( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_use_adaptive_mutex( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_bytes_per_sync( + rocksdb_options_t*, uint64_t); +extern void rocksdb_options_set_filter_deletes( + rocksdb_options_t*, unsigned char); +extern void rocksdb_options_set_max_sequential_skip_in_iterations( + rocksdb_options_t*, uint64_t); extern void rocksdb_options_set_disable_data_sync(rocksdb_options_t*, int); extern void rocksdb_options_set_disable_auto_compactions(rocksdb_options_t*, int); extern void rocksdb_options_set_disable_seek_compaction(rocksdb_options_t*, int); +extern void rocksdb_options_set_delete_obsolete_files_period_micros( + rocksdb_options_t*, uint64_t); extern void rocksdb_options_set_source_compaction_factor(rocksdb_options_t*, int); extern void rocksdb_options_prepare_for_bulk_load(rocksdb_options_t*); extern void rocksdb_options_set_memtable_vector_rep(rocksdb_options_t*); - +extern void rocksdb_options_set_memtable_prefix_bloom_bits( + rocksdb_options_t*, uint32_t); +extern void rocksdb_options_set_memtable_prefix_bloom_probes( + rocksdb_options_t*, uint32_t); +extern void rocksdb_options_set_max_successive_merges( + rocksdb_options_t*, size_t); enum { rocksdb_no_compression = 0, @@ -283,6 +392,27 @@ extern void rocksdb_filterpolicy_destroy(rocksdb_filterpolicy_t*); extern rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom( int bits_per_key); +/* Merge Operator */ + +extern rocksdb_mergeoperator_t* rocksdb_mergeoperator_create( + void* state, + void (*destructor)(void*), + char* (*full_merge)( + void*, + const char* key, size_t key_length, + const char* existing_value, size_t existing_value_length, + const char* const* operands_list, const size_t* operands_list_length, + int num_operands, + unsigned char* success, size_t* new_value_length), + char* (*partial_merge)( + void*, + const char* key, size_t key_length, + const char* left_operand, size_t left_operand_length, + const char* right_operand, size_t right_operand_length, + unsigned char* success, size_t* new_value_length), + const char* (*name)(void*)); +extern void rocksdb_mergeoperator_destroy(rocksdb_mergeoperator_t*); + /* Read options */ extern rocksdb_readoptions_t* rocksdb_readoptions_create(); @@ -292,9 +422,17 @@ extern void rocksdb_readoptions_set_verify_checksums( unsigned char); extern void rocksdb_readoptions_set_fill_cache( rocksdb_readoptions_t*, unsigned char); +extern void rocksdb_readoptions_set_prefix_seek( + rocksdb_readoptions_t*, unsigned char); extern void rocksdb_readoptions_set_snapshot( rocksdb_readoptions_t*, const rocksdb_snapshot_t*); +extern void rocksdb_readoptions_set_prefix( + rocksdb_readoptions_t*, const char* key, size_t keylen); +extern void rocksdb_readoptions_set_read_tier( + rocksdb_readoptions_t*, int); +extern void rocksdb_readoptions_set_tailing( + rocksdb_readoptions_t*, unsigned char); /* Write options */ @@ -304,6 +442,13 @@ extern void rocksdb_writeoptions_set_sync( rocksdb_writeoptions_t*, unsigned char); extern void rocksdb_writeoptions_disable_WAL(rocksdb_writeoptions_t* opt, int disable); +/* Flush options */ + +extern rocksdb_flushoptions_t* rocksdb_flushoptions_create(); +extern void rocksdb_flushoptions_destroy(rocksdb_flushoptions_t*); +extern void rocksdb_flushoptions_set_wait( + rocksdb_flushoptions_t*, unsigned char); + /* Cache */ extern rocksdb_cache_t* rocksdb_cache_create_lru(size_t capacity); @@ -316,6 +461,25 @@ extern void rocksdb_env_set_background_threads(rocksdb_env_t* env, int n); extern void rocksdb_env_set_high_priority_background_threads(rocksdb_env_t* env, int n); extern void rocksdb_env_destroy(rocksdb_env_t*); +/* SliceTransform */ + +extern rocksdb_slicetransform_t* rocksdb_slicetransform_create( + void* state, + void (*destructor)(void*), + char* (*transform)( + void*, + const char* key, size_t length, + size_t* dst_length), + unsigned char (*in_domain)( + void*, + const char* key, size_t length), + unsigned char (*in_range)( + void*, + const char* key, size_t length), + const char* (*name)(void*)); +extern rocksdb_slicetransform_t* rocksdb_slicetransform_create_fixed_prefix(size_t); +extern void rocksdb_slicetransform_destroy(rocksdb_slicetransform_t*); + /* Universal Compaction options */ enum { From 2b205b35d800b7aeb7638514d47ba1cf3da1e2f8 Mon Sep 17 00:00:00 2001 From: Kai Liu Date: Wed, 19 Feb 2014 15:38:57 -0800 Subject: [PATCH 05/14] Disable putting filter block to block cache Summary: This bug caused server crash issues because the filter block is too big and kept purging out of cache. Test Plan: Wrote a new unit tests to make sure it works. Reviewers: dhruba, haobo, igor, sdong Reviewed By: haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D16221 --- db/plain_table_db_test.cc | 2 - table/block_based_table_reader.cc | 20 ++++++- table/block_based_table_reader.h | 3 + table/table_test.cc | 92 +++++++++++++++++++++++++++---- 4 files changed, 101 insertions(+), 16 deletions(-) diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index 3ad7ce8d7e..85d0478090 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -220,7 +220,6 @@ class TestPlainTableFactory : public PlainTableFactory { size_t index_sparseness = 16) : PlainTableFactory(user_key_len, user_key_len, hash_table_ratio, hash_table_ratio), - user_key_len_(user_key_len), bloom_bits_per_key_(bloom_bits_per_key), hash_table_ratio_(hash_table_ratio), index_sparseness_(index_sparseness), @@ -245,7 +244,6 @@ class TestPlainTableFactory : public PlainTableFactory { } private: - uint32_t user_key_len_; int bloom_bits_per_key_; double hash_table_ratio_; size_t index_sparseness_; diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 88ec655575..4921d28f4d 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -300,7 +300,7 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions, assert(index_block->compressionType() == kNoCompression); rep->index_block.reset(index_block); - // Set index block + // Set filter block if (rep->options.filter_policy) { std::string key = kFilterBlockPrefix; key.append(rep->options.filter_policy->Name()); @@ -681,8 +681,14 @@ Iterator* BlockBasedTable::BlockReader(void* arg, BlockBasedTable::CachableEntry BlockBasedTable::GetFilter(bool no_io) const { - if (!rep_->options.filter_policy || !rep_->options.block_cache) { - return {rep_->filter.get(), nullptr}; + // filter pre-populated + if (rep_->filter != nullptr) { + return {rep_->filter.get(), nullptr /* cache handle */}; + } + + if (rep_->options.filter_policy == nullptr /* do not use filter at all */ || + rep_->options.block_cache == nullptr /* no block cache at all */) { + return {nullptr /* filter */, nullptr /* cache handle */}; } // Fetching from the cache @@ -979,4 +985,12 @@ uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key) { return result; } +bool BlockBasedTable::TEST_filter_block_preloaded() const { + return rep_->filter != nullptr; +} + +bool BlockBasedTable::TEST_index_block_preloaded() const { + return rep_->index_block != nullptr; +} + } // namespace rocksdb diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index c711e70367..fc584d9ece 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -90,6 +90,9 @@ class BlockBasedTable : public TableReader { ~BlockBasedTable(); + bool TEST_filter_block_preloaded() const; + bool TEST_index_block_preloaded() const; + private: template struct CachableEntry; diff --git a/table/table_test.cc b/table/table_test.cc index bef5caac1d..f2a95966a0 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -29,6 +29,7 @@ #include "rocksdb/memtablerep.h" #include "table/block.h" #include "table/meta_blocks.h" +#include "table/block_based_table_reader.h" #include "table/block_based_table_builder.h" #include "table/block_based_table_factory.h" #include "table/block_based_table_reader.h" @@ -960,6 +961,7 @@ class BlockBasedTableTest : public TableTest {}; class PlainTableTest : public TableTest {}; class TablePropertyTest {}; +/* // This test serves as the living tutorial for the prefix scan of user collected // properties. TEST(TablePropertyTest, PrefixScanTest) { @@ -1121,19 +1123,37 @@ TEST(BlockBasedTableTest, NumBlockStat) { ASSERT_EQ(kvmap.size(), c.table_reader()->GetTableProperties()->num_data_blocks); } +*/ -class BlockCacheProperties { +// A simple tool that takes the snapshot of block cache statistics. +class BlockCachePropertiesSnapshot { public: - explicit BlockCacheProperties(Statistics* statistics) { + explicit BlockCachePropertiesSnapshot(Statistics* statistics) { block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_MISS); block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_HIT); index_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS); index_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT); data_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_DATA_MISS); data_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_DATA_HIT); + filter_block_cache_miss = + statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS); + filter_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT); + } + + void AssertIndexBlockStat(int64_t index_block_cache_miss, + int64_t index_block_cache_hit) { + ASSERT_EQ(index_block_cache_miss, this->index_block_cache_miss); + ASSERT_EQ(index_block_cache_hit, this->index_block_cache_hit); + } + + void AssertFilterBlockStat(int64_t filter_block_cache_miss, + int64_t filter_block_cache_hit) { + ASSERT_EQ(filter_block_cache_miss, this->filter_block_cache_miss); + ASSERT_EQ(filter_block_cache_hit, this->filter_block_cache_hit); } // Check if the fetched props matches the expected ones. + // TODO(kailiu) Use this only when you disabled filter policy! void AssertEqual(int64_t index_block_cache_miss, int64_t index_block_cache_hit, int64_t data_block_cache_miss, int64_t data_block_cache_hit) const { @@ -1154,9 +1174,55 @@ class BlockCacheProperties { int64_t index_block_cache_hit = 0; int64_t data_block_cache_miss = 0; int64_t data_block_cache_hit = 0; + int64_t filter_block_cache_miss = 0; + int64_t filter_block_cache_hit = 0; }; -TEST(BlockBasedTableTest, BlockCacheTest) { +// Make sure, by default, index/filter blocks were pre-loaded (meaning we won't +// use block cache to store them). +TEST(BlockBasedTableTest, BlockCacheDisabledTest) { + Options options; + options.create_if_missing = true; + options.statistics = CreateDBStatistics(); + options.block_cache = NewLRUCache(1024); + std::unique_ptr filter_policy(NewBloomFilterPolicy(10)); + options.filter_policy = filter_policy.get(); + BlockBasedTableOptions table_options; + // Intentionally commented out: table_options.cache_index_and_filter_blocks = + // true; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + std::vector keys; + KVMap kvmap; + + TableConstructor c(BytewiseComparator()); + c.Add("key", "value"); + c.Finish(options, GetPlainInternalComparator(options.comparator), &keys, + &kvmap); + + // preloading filter/index blocks is enabled. + auto reader = dynamic_cast(c.table_reader()); + ASSERT_TRUE(reader->TEST_filter_block_preloaded()); + ASSERT_TRUE(reader->TEST_index_block_preloaded()); + + { + // nothing happens in the beginning + BlockCachePropertiesSnapshot props(options.statistics.get()); + props.AssertIndexBlockStat(0, 0); + props.AssertFilterBlockStat(0, 0); + } + + { + // a hack that just to trigger BlockBasedTable::GetFilter. + reader->Get(ReadOptions(), "non-exist-key", nullptr, nullptr, nullptr); + BlockCachePropertiesSnapshot props(options.statistics.get()); + props.AssertIndexBlockStat(0, 0); + props.AssertFilterBlockStat(0, 0); + } +} + +// Due to the difficulities of the intersaction between statistics, this test +// only tests the case when "index block is put to block cache" +TEST(BlockBasedTableTest, FilterBlockInBlockCache) { // -- Table construction Options options; options.create_if_missing = true; @@ -1174,6 +1240,10 @@ TEST(BlockBasedTableTest, BlockCacheTest) { c.Add("key", "value"); c.Finish(options, GetPlainInternalComparator(options.comparator), &keys, &kvmap); + // preloading filter/index blocks is prohibited. + auto reader = dynamic_cast(c.table_reader()); + ASSERT_TRUE(!reader->TEST_filter_block_preloaded()); + ASSERT_TRUE(!reader->TEST_index_block_preloaded()); // -- PART 1: Open with regular block cache. // Since block_cache is disabled, no cache activities will be involved. @@ -1181,7 +1251,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { // At first, no block will be accessed. { - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); // index will be added to block cache. props.AssertEqual(1, // index block miss 0, 0, 0); @@ -1190,7 +1260,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { // Only index block will be accessed { iter.reset(c.NewIterator()); - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); // NOTE: to help better highlight the "detla" of each ticker, I use // + to indicate the increment of changed // value; other numbers remain the same. @@ -1201,7 +1271,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { // Only data block will be accessed { iter->SeekToFirst(); - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(1, 1, 0 + 1, // data block miss 0); } @@ -1210,7 +1280,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { { iter.reset(c.NewIterator()); iter->SeekToFirst(); - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(1, 1 + 1, /* index block hit */ 1, 0 + 1 /* data block hit */); } @@ -1226,7 +1296,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { iter.reset(c.NewIterator()); iter->SeekToFirst(); ASSERT_EQ("key", iter->key().ToString()); - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); // Nothing is affected at all props.AssertEqual(0, 0, 0, 0); } @@ -1237,7 +1307,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { options.block_cache = NewLRUCache(1); c.Reopen(options); { - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(1, // index block miss 0, 0, 0); } @@ -1248,7 +1318,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { // It first cache index block then data block. But since the cache size // is only 1, index block will be purged after data block is inserted. iter.reset(c.NewIterator()); - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(1 + 1, // index block miss 0, 0, // data block miss 0); @@ -1258,7 +1328,7 @@ TEST(BlockBasedTableTest, BlockCacheTest) { // SeekToFirst() accesses data block. With similar reason, we expect data // block's cache miss. iter->SeekToFirst(); - BlockCacheProperties props(options.statistics.get()); + BlockCachePropertiesSnapshot props(options.statistics.get()); props.AssertEqual(2, 0, 0 + 1, // data block miss 0); } From b2d29675c8ef1ebf5c860a3447e592152638f4e0 Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 19 Feb 2014 16:50:00 -0800 Subject: [PATCH 06/14] Add a test in prefix_test to verify correctness of results Summary: Add a test to verify HashLinkList and HashSkipList (mainly for the former one) returns the correct results when inserting the same bucket in the different orders. Some other changes: (1) add the test to test list (2) fix compile error (3) add header Test Plan: ./prefix_test Reviewers: haobo, kailiu Reviewed By: haobo CC: igor, yhchiang, i.am.jin.lei, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D16143 --- Makefile | 1 + db/prefix_test.cc | 232 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 223 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 0fe7c04fba..5a627a0174 100644 --- a/Makefile +++ b/Makefile @@ -77,6 +77,7 @@ TESTS = \ redis_test \ reduce_levels_test \ plain_table_db_test \ + prefix_test \ simple_table_db_test \ skiplist_test \ stringappend_test \ diff --git a/db/prefix_test.cc b/db/prefix_test.cc index ca00c31b35..c43ba5c4dd 100644 --- a/db/prefix_test.cc +++ b/db/prefix_test.cc @@ -1,3 +1,8 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + #include #include #include @@ -6,6 +11,8 @@ #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/perf_context.h" +#include "rocksdb/slice_transform.h" +#include "rocksdb/memtablerep.h" #include "util/histogram.h" #include "util/stop_watch.h" #include "util/testharness.h" @@ -97,6 +104,36 @@ class TestKeyComparator : public Comparator { }; +void PutKey(DB* db, WriteOptions write_options, uint64_t prefix, + uint64_t suffix, const Slice& value) { + TestKey test_key(prefix, suffix); + Slice key = TestKeyToSlice(test_key); + ASSERT_OK(db->Put(write_options, key, value)); +} + +void SeekIterator(Iterator* iter, uint64_t prefix, uint64_t suffix) { + TestKey test_key(prefix, suffix); + Slice key = TestKeyToSlice(test_key); + iter->Seek(key); +} + +const std::string kNotFoundResult = "NOT_FOUND"; + +std::string Get(DB* db, const ReadOptions& read_options, uint64_t prefix, + uint64_t suffix) { + TestKey test_key(prefix, suffix); + Slice key = TestKeyToSlice(test_key); + + std::string result; + Status s = db->Get(read_options, key, &result); + if (s.IsNotFound()) { + result = kNotFoundResult; + } else if (!s.ok()) { + result = s.ToString(); + } + return result; +} + class PrefixTest { public: std::shared_ptr OpenDb() { @@ -116,7 +153,11 @@ class PrefixTest { return std::shared_ptr(db); } - bool NextOptions() { + void FirstOption() { + option_config_ = kBegin; + } + + bool NextOptions(int bucket_count) { // skip some options option_config_++; if (option_config_ < kEnd) { @@ -124,15 +165,12 @@ class PrefixTest { options.prefix_extractor = prefix_extractor; switch(option_config_) { case kHashSkipList: - options.memtable_factory.reset( - NewHashSkipListRepFactory(options.prefix_extractor, - FLAGS_bucket_count, - FLAGS_skiplist_height)); + options.memtable_factory.reset(NewHashSkipListRepFactory( + options.prefix_extractor, bucket_count, FLAGS_skiplist_height)); return true; case kHashLinkList: - options.memtable_factory.reset( - NewHashLinkListRepFactory(options.prefix_extractor, - FLAGS_bucket_count)); + options.memtable_factory.reset(NewHashLinkListRepFactory( + options.prefix_extractor, bucket_count)); return true; default: return false; @@ -158,8 +196,182 @@ class PrefixTest { Options options; }; +TEST(PrefixTest, TestResult) { + for (int num_buckets = 1; num_buckets <= 2; num_buckets++) { + FirstOption(); + while (NextOptions(num_buckets)) { + std::cout << "*** Mem table: " << options.memtable_factory->Name() + << " number of buckets: " << num_buckets + << std::endl; + DestroyDB(kDbName, Options()); + auto db = OpenDb(); + WriteOptions write_options; + ReadOptions read_options; + read_options.prefix_seek = true; + + // 1. Insert one row. + Slice v16("v16"); + PutKey(db.get(), write_options, 1, 6, v16); + std::unique_ptr iter(db->NewIterator(read_options)); + SeekIterator(iter.get(), 1, 6); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v16 == iter->value()); + SeekIterator(iter.get(), 1, 5); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v16 == iter->value()); + SeekIterator(iter.get(), 1, 5); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v16 == iter->value()); + iter->Next(); + ASSERT_TRUE(!iter->Valid()); + + SeekIterator(iter.get(), 2, 0); + ASSERT_TRUE(!iter->Valid()); + + ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6)); + ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 1, 5)); + ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 1, 7)); + ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 0, 6)); + ASSERT_EQ(kNotFoundResult, Get(db.get(), read_options, 2, 6)); + + // 2. Insert an entry for the same prefix as the last entry in the bucket. + Slice v17("v17"); + PutKey(db.get(), write_options, 1, 7, v17); + iter.reset(db->NewIterator(read_options)); + SeekIterator(iter.get(), 1, 7); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + + SeekIterator(iter.get(), 1, 6); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v16 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + iter->Next(); + ASSERT_TRUE(!iter->Valid()); + + SeekIterator(iter.get(), 2, 0); + ASSERT_TRUE(!iter->Valid()); + + // 3. Insert an entry for the same prefix as the head of the bucket. + Slice v15("v15"); + PutKey(db.get(), write_options, 1, 5, v15); + iter.reset(db->NewIterator(read_options)); + + SeekIterator(iter.get(), 1, 7); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + + SeekIterator(iter.get(), 1, 5); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v15 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v16 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + + SeekIterator(iter.get(), 1, 5); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v15 == iter->value()); + + ASSERT_EQ(v15.ToString(), Get(db.get(), read_options, 1, 5)); + ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6)); + ASSERT_EQ(v17.ToString(), Get(db.get(), read_options, 1, 7)); + + // 4. Insert an entry with a larger prefix + Slice v22("v22"); + PutKey(db.get(), write_options, 2, 2, v22); + iter.reset(db->NewIterator(read_options)); + + SeekIterator(iter.get(), 2, 2); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v22 == iter->value()); + SeekIterator(iter.get(), 2, 0); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v22 == iter->value()); + + SeekIterator(iter.get(), 1, 5); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v15 == iter->value()); + + SeekIterator(iter.get(), 1, 7); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + + // 5. Insert an entry with a smaller prefix + Slice v02("v02"); + PutKey(db.get(), write_options, 0, 2, v02); + iter.reset(db->NewIterator(read_options)); + + SeekIterator(iter.get(), 0, 2); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v02 == iter->value()); + SeekIterator(iter.get(), 0, 0); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v02 == iter->value()); + + SeekIterator(iter.get(), 2, 0); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v22 == iter->value()); + + SeekIterator(iter.get(), 1, 5); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v15 == iter->value()); + + SeekIterator(iter.get(), 1, 7); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + + // 6. Insert to the beginning and the end of the first prefix + Slice v13("v13"); + Slice v18("v18"); + PutKey(db.get(), write_options, 1, 3, v13); + PutKey(db.get(), write_options, 1, 8, v18); + iter.reset(db->NewIterator(read_options)); + SeekIterator(iter.get(), 1, 7); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + + SeekIterator(iter.get(), 1, 3); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v13 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v15 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v16 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v17 == iter->value()); + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v18 == iter->value()); + + SeekIterator(iter.get(), 0, 0); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v02 == iter->value()); + + SeekIterator(iter.get(), 2, 0); + ASSERT_TRUE(iter->Valid()); + ASSERT_TRUE(v22 == iter->value()); + + ASSERT_EQ(v22.ToString(), Get(db.get(), read_options, 2, 2)); + ASSERT_EQ(v02.ToString(), Get(db.get(), read_options, 0, 2)); + ASSERT_EQ(v13.ToString(), Get(db.get(), read_options, 1, 3)); + ASSERT_EQ(v15.ToString(), Get(db.get(), read_options, 1, 5)); + ASSERT_EQ(v16.ToString(), Get(db.get(), read_options, 1, 6)); + ASSERT_EQ(v17.ToString(), Get(db.get(), read_options, 1, 7)); + ASSERT_EQ(v18.ToString(), Get(db.get(), read_options, 1, 8)); + } + } +} + TEST(PrefixTest, DynamicPrefixIterator) { - while (NextOptions()) { + while (NextOptions(FLAGS_bucket_count)) { std::cout << "*** Mem table: " << options.memtable_factory->Name() << std::endl; DestroyDB(kDbName, Options()); @@ -260,7 +472,7 @@ TEST(PrefixTest, DynamicPrefixIterator) { } TEST(PrefixTest, PrefixHash) { - while (NextOptions()) { + while (NextOptions(FLAGS_bucket_count)) { std::cout << "*** Mem table: " << options.memtable_factory->Name() << std::endl; DestroyDB(kDbName, Options()); From 15ac5ad1f613d251ea79cf00b242b1e47106e6d0 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Thu, 20 Feb 2014 10:55:54 -0800 Subject: [PATCH 07/14] Update to CONTRIBUTING.md --- CONTRIBUTING.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3a17a883f3..7a94c7f69c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,6 +10,10 @@ the CLA and we can cross-check with your GitHub username. Complete your CLA here: +If you don't have a Facebook account, we can send you a PDF that you can +sign offline. Send us an e-mail or create a new github issue to +request the CLA in PDF format. + ## License By contributing to RocksDB, you agree that your contributions will be From d45d17b2a3a6fe6e456989cbeb11a666ddc3b42d Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Thu, 20 Feb 2014 12:47:05 -0800 Subject: [PATCH 08/14] allow lambda function syntax in cpplint Summary: as title Test Plan: arc lint Reviewers: kailiu Reviewed By: kailiu CC: leveldb Differential Revision: https://reviews.facebook.net/D16251 --- linters/cpp_linter/cpplint.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/linters/cpp_linter/cpplint.py b/linters/cpp_linter/cpplint.py index 5f7e169bb1..d264b00da0 100755 --- a/linters/cpp_linter/cpplint.py +++ b/linters/cpp_linter/cpplint.py @@ -3104,6 +3104,11 @@ def CheckBraces(filename, clean_lines, linenum, error): 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or Search(r'\s+=\s*$', line_prefix)): match = None + # Whitelist lambda function definition which also requires a ";" after + # closing brace + if match: + if Match(r'^.*\[.*\]\s*(.*\)\s*)\{', line): + match = None else: # Try matching cases 2-3. From 68248a2ac5e7c3b8dffbc14e9493447ac8819271 Mon Sep 17 00:00:00 2001 From: Thomas Adam Date: Sun, 23 Feb 2014 17:58:11 +0100 Subject: [PATCH 09/14] added a delete method for custom filter policy and merge operator to make it possible to override the cleanup behaviour of the return value --- db/c.cc | 33 +++++++++++++++++++++++++++++++++ include/rocksdb/c.h | 6 ++++++ 2 files changed, 39 insertions(+) diff --git a/db/c.cc b/db/c.cc index f3a200bbf2..2d4f491889 100644 --- a/db/c.cc +++ b/db/c.cc @@ -110,6 +110,9 @@ struct rocksdb_filterpolicy_t : public FilterPolicy { void*, const char* key, size_t length, const char* filter, size_t filter_length); + void (*delete_filter_)( + void*, + const char* filter, size_t filter_length); virtual ~rocksdb_filterpolicy_t() { (*destructor_)(state_); @@ -129,6 +132,12 @@ struct rocksdb_filterpolicy_t : public FilterPolicy { size_t len; char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len); dst->append(filter, len); + + if (delete_filter_ != nullptr) { + (*delete_filter_)(state_, filter, len); + } else { + free(filter); + } } virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { @@ -154,6 +163,9 @@ struct rocksdb_mergeoperator_t : public MergeOperator { const char* left_operand, size_t left_operand_length, const char* right_operand, size_t right_operand_length, unsigned char* success, size_t* new_value_length); + void (*delete_value_)( + void*, + const char* value, size_t value_length); virtual ~rocksdb_mergeoperator_t() { (*destructor_)(state_); @@ -196,6 +208,12 @@ struct rocksdb_mergeoperator_t : public MergeOperator { &success, &new_value_len); new_value->assign(tmp_new_value, new_value_len); + if (delete_value_ != nullptr) { + (*delete_value_)(state_, tmp_new_value, new_value_len); + } else { + free(tmp_new_value); + } + return success; } @@ -216,6 +234,12 @@ struct rocksdb_mergeoperator_t : public MergeOperator { &success, &new_value_len); new_value->assign(tmp_new_value, new_value_len); + if (delete_value_ != nullptr) { + (*delete_value_)(state_, tmp_new_value, new_value_len); + } else { + free(tmp_new_value); + } + return success; } }; @@ -1004,12 +1028,16 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create( void*, const char* key, size_t length, const char* filter, size_t filter_length), + void (*delete_filter)( + void*, + const char* filter, size_t filter_length), const char* (*name)(void*)) { rocksdb_filterpolicy_t* result = new rocksdb_filterpolicy_t; result->state_ = state; result->destructor_ = destructor; result->create_ = create_filter; result->key_match_ = key_may_match; + result->delete_filter_ = delete_filter; result->name_ = name; return result; } @@ -1037,6 +1065,7 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(int bits_per_key) { Wrapper* wrapper = new Wrapper; wrapper->rep_ = NewBloomFilterPolicy(bits_per_key); wrapper->state_ = NULL; + wrapper->delete_filter_ = NULL; wrapper->destructor_ = &Wrapper::DoNothing; return wrapper; } @@ -1057,12 +1086,16 @@ rocksdb_mergeoperator_t* rocksdb_mergeoperator_create( const char* left_operand, size_t left_operand_length, const char* right_operand, size_t right_operand_length, unsigned char* success, size_t* new_value_length), + void (*delete_value)( + void*, + const char* value, size_t value_length), const char* (*name)(void*)) { rocksdb_mergeoperator_t* result = new rocksdb_mergeoperator_t; result->state_ = state; result->destructor_ = destructor; result->full_merge_ = full_merge; result->partial_merge_ = partial_merge; + result->delete_value_ = delete_value; result->name_ = name; return result; } diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index c2382e1997..2157cb3e03 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -386,6 +386,9 @@ extern rocksdb_filterpolicy_t* rocksdb_filterpolicy_create( void*, const char* key, size_t length, const char* filter, size_t filter_length), + void (*delete_filter)( + void*, + const char* filter, size_t filter_length), const char* (*name)(void*)); extern void rocksdb_filterpolicy_destroy(rocksdb_filterpolicy_t*); @@ -410,6 +413,9 @@ extern rocksdb_mergeoperator_t* rocksdb_mergeoperator_create( const char* left_operand, size_t left_operand_length, const char* right_operand, size_t right_operand_length, unsigned char* success, size_t* new_value_length), + void (*delete_value)( + void*, + const char* value, size_t value_length), const char* (*name)(void*)); extern void rocksdb_mergeoperator_destroy(rocksdb_mergeoperator_t*); From ce2b1f7b445e49e5a6a9fbeb18db2ec55ac031c5 Mon Sep 17 00:00:00 2001 From: Thomas Adam Date: Sun, 23 Feb 2014 17:58:38 +0100 Subject: [PATCH 10/14] added a test case for custom merge operator --- db/c_test.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/db/c_test.c b/db/c_test.c index 8c5e8e5348..f4e5568fac 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -154,6 +154,37 @@ unsigned char FilterKeyMatch( return fake_filter_result; } +// Custom merge operator +static void MergeOperatorDestroy(void* arg) { } +static const char* MergeOperatorName(void* arg) { + return "TestMergeOperator"; +} +static char* MergeOperatorFullMerge( + void* arg, + const char* key, size_t key_length, + const char* existing_value, size_t existing_value_length, + const char* const* operands_list, const size_t* operands_list_length, + int num_operands, + unsigned char* success, size_t* new_value_length) { + *new_value_length = 4; + *success = 1; + char* result = malloc(4); + memcpy(result, "fake", 4); + return result; +} +static char* MergeOperatorPartialMerge( + void* arg, + const char* key, size_t key_length, + const char* left_operand, size_t left_operand_length, + const char* right_operand, size_t right_operand_length, + unsigned char* success, size_t* new_value_length) { + *new_value_length = 4; + *success = 1; + char* result = malloc(4); + memcpy(result, "fake", 4); + return result; +} + int main(int argc, char** argv) { rocksdb_t* db; rocksdb_comparator_t* cmp; @@ -342,7 +373,7 @@ int main(int argc, char** argv) { rocksdb_filterpolicy_t* policy; if (run == 0) { policy = rocksdb_filterpolicy_create( - NULL, FilterDestroy, FilterCreate, FilterKeyMatch, FilterName); + NULL, FilterDestroy, FilterCreate, FilterKeyMatch, NULL, FilterName); } else { policy = rocksdb_filterpolicy_create_bloom(10); } @@ -376,6 +407,34 @@ int main(int argc, char** argv) { rocksdb_filterpolicy_destroy(policy); } + StartPhase("merge_operator"); + { + rocksdb_mergeoperator_t* merge_operator; + merge_operator = rocksdb_mergeoperator_create( + NULL, MergeOperatorDestroy, MergeOperatorFullMerge, + MergeOperatorPartialMerge, NULL, MergeOperatorName); + // Create new database + rocksdb_close(db); + rocksdb_destroy_db(options, dbname, &err); + rocksdb_options_set_merger_operator(options, merge_operator); + db = rocksdb_open(options, dbname, &err); + CheckNoError(err); + rocksdb_put(db, woptions, "foo", 3, "foovalue", 8, &err); + CheckNoError(err); + CheckGet(db, roptions, "foo", "foovalue"); + rocksdb_merge(db, woptions, "foo", 3, "barvalue", 8, &err); + CheckNoError(err); + CheckGet(db, roptions, "foo", "fake"); + + // Merge of a non-existing value + rocksdb_merge(db, woptions, "bar", 3, "barvalue", 8, &err); + CheckNoError(err); + CheckGet(db, roptions, "bar", "fake"); + + rocksdb_options_set_merger_operator(options, NULL); + rocksdb_mergeoperator_destroy(merge_operator); + } + StartPhase("cleanup"); rocksdb_close(db); rocksdb_options_destroy(options); From c9244dcba641648f63327429fe84324aa08331c4 Mon Sep 17 00:00:00 2001 From: Kai Liu Date: Mon, 24 Feb 2014 12:29:26 -0800 Subject: [PATCH 11/14] Update the instruction to build shared library --- INSTALL.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/INSTALL.md b/INSTALL.md index a63b9b15cc..472fd23310 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -71,7 +71,7 @@ libraries. You are on your own. `make clean; make` will compile librocksdb.a (RocskDB static library) and all the unit tests. You can run all unit tests with `make check`. -For shared library builds, exec `make librocksdb.so` instead. +For shared library builds, exec `make shared_lib` instead. If you followed the above steps and your compile or unit tests fail, please submit an issue: (https://github.com/facebook/rocksdb/issues) From 01c27be5fb42524c5052b4b4a23e05501e1d1421 Mon Sep 17 00:00:00 2001 From: sdong Date: Wed, 19 Feb 2014 14:55:34 -0800 Subject: [PATCH 12/14] A simple benchmark to measure WAL append latency Summary: A simple benchmark that simulates WAL append. It can be used to test different platform/file system's performance on WAL. Test Plan: run it. Reviewers: haobo, kailiu Reviewed By: haobo CC: igor, dhruba, i.am.jin.lei, yhchiang, leveldb, nkg- Differential Revision: https://reviews.facebook.net/D16239 --- Makefile | 3 ++ util/log_write_bench.cc | 69 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 util/log_write_bench.cc diff --git a/Makefile b/Makefile index 5a627a0174..3be3c3d081 100644 --- a/Makefile +++ b/Makefile @@ -285,6 +285,9 @@ crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) +log_write_bench: util/log_write_bench.o $(LIBOBJECTS) $(TESTHARNESS) + $(CXX) util/log_write_bench.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) -pg + plain_table_db_test: db/plain_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) db/plain_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) diff --git a/util/log_write_bench.cc b/util/log_write_bench.cc new file mode 100644 index 0000000000..642d4e8328 --- /dev/null +++ b/util/log_write_bench.cc @@ -0,0 +1,69 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include + +#include "rocksdb/env.h" +#include "util/histogram.h" +#include "util/testharness.h" +#include "util/testutil.h" + +// A simple benchmark to simulate transactional logs + +DEFINE_int32(num_records, 6000, "Size of each record."); +DEFINE_int32(record_size, 249, "Size of each record."); +DEFINE_int32(record_interval, 10000, "Interval between records (microSec)"); +DEFINE_int32(bytes_per_sync, 0, "bytes_per_sync parameter in EnvOptions"); +DEFINE_bool(enable_sync, false, "sync after each write."); + +namespace rocksdb { +void RunBenchmark() { + std::string file_name = test::TmpDir() + "/log_write_bench.log"; + Env* env = Env::Default(); + EnvOptions env_options; + env_options.use_mmap_writes = false; + env_options.bytes_per_sync = FLAGS_bytes_per_sync; + unique_ptr file; + env->NewWritableFile(file_name, &file, env_options); + + std::string record; + record.assign('X', FLAGS_record_size); + + HistogramImpl hist; + + uint64_t start_time = env->NowMicros(); + for (int i = 0; i < FLAGS_num_records; i++) { + uint64_t start_nanos = env->NowNanos(); + file->Append(record); + file->Flush(); + if (FLAGS_enable_sync) { + file->Sync(); + } + hist.Add(env->NowNanos() - start_nanos); + + if (i % 1000 == 1) { + fprintf(stderr, "Wrote %d records...\n", i); + } + + int time_to_sleep = + (i + 1) * FLAGS_record_interval - (env->NowMicros() - start_time); + if (time_to_sleep > 0) { + env->SleepForMicroseconds(time_to_sleep); + } + } + + fprintf(stderr, "Distribution of latency of append+flush: \n%s", + hist.ToString().c_str()); +} +} // namespace rocksdb + +int main(int argc, char** argv) { + google::SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) + + " [OPTIONS]..."); + google::ParseCommandLineFlags(&argc, &argv, true); + + rocksdb::RunBenchmark(); + return 0; +} From 2bf1151a25de5c711d0c863973c2d995e1137c60 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Mon, 24 Feb 2014 15:15:34 -0800 Subject: [PATCH 13/14] Fix C API --- db/c.cc | 2 +- db/c_test.c | 4 +--- include/rocksdb/c.h | 5 ++--- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/db/c.cc b/db/c.cc index 2d4f491889..935a297f10 100644 --- a/db/c.cc +++ b/db/c.cc @@ -595,7 +595,7 @@ void rocksdb_options_set_comparator( opt->rep.comparator = cmp; } -void rocksdb_options_set_merger_operator( +void rocksdb_options_set_merge_operator( rocksdb_options_t* opt, rocksdb_mergeoperator_t* merge_operator) { opt->rep.merge_operator = std::shared_ptr(merge_operator); diff --git a/db/c_test.c b/db/c_test.c index f4e5568fac..a68abca489 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -416,7 +416,7 @@ int main(int argc, char** argv) { // Create new database rocksdb_close(db); rocksdb_destroy_db(options, dbname, &err); - rocksdb_options_set_merger_operator(options, merge_operator); + rocksdb_options_set_merge_operator(options, merge_operator); db = rocksdb_open(options, dbname, &err); CheckNoError(err); rocksdb_put(db, woptions, "foo", 3, "foovalue", 8, &err); @@ -431,8 +431,6 @@ int main(int argc, char** argv) { CheckNoError(err); CheckGet(db, roptions, "bar", "fake"); - rocksdb_options_set_merger_operator(options, NULL); - rocksdb_mergeoperator_destroy(merge_operator); } StartPhase("cleanup"); diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 2157cb3e03..91efed37fd 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -216,9 +216,8 @@ extern void rocksdb_options_destroy(rocksdb_options_t*); extern void rocksdb_options_set_comparator( rocksdb_options_t*, rocksdb_comparator_t*); -extern void rocksdb_options_set_merger_operator( - rocksdb_options_t*, - rocksdb_mergeoperator_t*); +extern void rocksdb_options_set_merge_operator(rocksdb_options_t*, + rocksdb_mergeoperator_t*); extern void rocksdb_options_set_compression_per_level( rocksdb_options_t* opt, int* level_values, From 6ed450a58cd0bf9f299b0e279ce762125b79deea Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Mon, 24 Feb 2014 16:00:13 -0800 Subject: [PATCH 14/14] DeleteFile should schedule Flush or Compaction Summary: More info here: https://github.com/facebook/rocksdb/issues/89 If flush fails because of ENOSPC, we have a deadlock problem. This is a quick fix that will continue the normal operation when user deletes the file and frees up the space on the device. We need to address the issue more broadly with bg_error_ cleanup. Test Plan: make check Reviewers: dhruba, haobo, ljin Reviewed By: ljin CC: leveldb Differential Revision: https://reviews.facebook.net/D16275 --- db/db_impl.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/db/db_impl.cc b/db/db_impl.cc index 041c621079..12042585b5 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -3623,6 +3623,12 @@ Status DBImpl::DeleteFile(std::string name) { LogFlush(options_.info_log); // remove files outside the db-lock PurgeObsoleteFiles(deletion_state); + { + MutexLock l(&mutex_); + // schedule flush if file deletion means we freed the space for flushes to + // continue + MaybeScheduleFlushOrCompaction(); + } return status; }