From 0b4ccf765c8f3f72d8bd2f34977aeb0eba89b357 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Thu, 6 Feb 2014 12:59:16 -0800 Subject: [PATCH] Flushes should always go to HIGH priority thread pool Summary: This is not column-family related diff. It is in columnfamily branch because the change is significant and we want to push it with next major release (3.0). It removes the leveldb notion of one thread pool and expands it to two thread pools by default (HIGH and LOW). Flush process is removed from compaction process and all flush threads are executed on HIGH thread pool, since we don't want long-running compactions to influence flush latency. Test Plan: make check Reviewers: dhruba, haobo, kailiu, sdong CC: leveldb Differential Revision: https://reviews.facebook.net/D15987 --- HISTORY.md | 7 +++- db/db_impl.cc | 68 +++++++++------------------------------ include/rocksdb/options.h | 14 ++++++-- util/options.cc | 2 +- 4 files changed, 34 insertions(+), 57 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 1c85093373..f7fac7b0dd 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,11 @@ # Rocksdb Change Log -## Unreleased +## Unreleased (will be released in 3.0) +* By default, max_background_flushes is 1 and flush process is + removed from background compaction process. Flush process is now always + executed in high priority thread pool. + +## Unreleased (will be relased in 2.8) * By default, checksums are verified on every read from database diff --git a/db/db_impl.cc b/db/db_impl.cc index d346d915ba..91e327a8b1 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -142,6 +142,9 @@ Options SanitizeOptions(const std::string& dbname, DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) { DBOptions result = src; ClipToRange(&result.max_open_files, 20, 1000000); + if (result.max_background_flushes == 0) { + result.max_background_flushes = 1; + } if (result.info_log == nullptr) { Status s = CreateLoggerFromOptions(dbname, result.db_log_dir, src.env, @@ -1704,11 +1707,15 @@ void DBImpl::MaybeScheduleFlushOrCompaction() { is_flush_pending = true; } } - if (is_flush_pending && - (bg_flush_scheduled_ < options_.max_background_flushes)) { + if (is_flush_pending) { // memtable flush needed - bg_flush_scheduled_++; - env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH); + // max_background_compactions should not be 0, because that means + // flush will never get executed + assert(options_.max_background_flushes != 0); + if (bg_flush_scheduled_ < options_.max_background_flushes) { + bg_flush_scheduled_++; + env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH); + } } bool is_compaction_needed = false; for (auto cfd : *versions_->GetColumnFamilySet()) { @@ -1718,12 +1725,10 @@ void DBImpl::MaybeScheduleFlushOrCompaction() { } } - // Schedule BGWorkCompaction if there's a compaction pending (or a memtable - // flush, but the HIGH pool is not enabled). Do it only if - // max_background_compactions hasn't been reached and, in case + // Schedule BGWorkCompaction if there's a compaction pending + // Do it only if max_background_compactions hasn't been reached and, in case // bg_manual_only_ > 0, if it's a manual compaction. - if ((manual_compaction_ || is_compaction_needed || - (is_flush_pending && (options_.max_background_flushes <= 0))) && + if ((manual_compaction_ || is_compaction_needed) && bg_compaction_scheduled_ < options_.max_background_compactions && (!bg_manual_only_ || manual_compaction_)) { @@ -1868,41 +1873,14 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, *madeProgress = false; mutex_.AssertHeld(); + unique_ptr c; bool is_manual = (manual_compaction_ != nullptr) && (manual_compaction_->in_progress == false); - if (is_manual) { - // another thread cannot pick up the same work - manual_compaction_->in_progress = true; - } - - // TODO: remove memtable flush from formal compaction - for (auto cfd : *versions_->GetColumnFamilySet()) { - while (cfd->imm()->IsFlushPending()) { - Log(options_.info_log, - "BackgroundCompaction doing FlushMemTableToOutputFile with column " - "family %d, compaction slots available %d", - cfd->GetID(), - options_.max_background_compactions - bg_compaction_scheduled_); - Status stat = - FlushMemTableToOutputFile(cfd, madeProgress, deletion_state); - if (!stat.ok()) { - if (is_manual) { - manual_compaction_->status = stat; - manual_compaction_->done = true; - manual_compaction_->in_progress = false; - manual_compaction_ = nullptr; - } - return stat; - } - } - } - - unique_ptr c; InternalKey manual_end_storage; InternalKey* manual_end = &manual_end_storage; if (is_manual) { ManualCompaction* m = manual_compaction_; - assert(m->in_progress); + m->in_progress = true; c.reset(m->cfd->CompactRange(m->input_level, m->output_level, m->begin, m->end, &manual_end)); if (!c) { @@ -2299,20 +2277,6 @@ Status DBImpl::DoCompactionWork(CompactionState* compact, } for (; input->Valid() && !shutting_down_.Acquire_Load(); ) { - // Prioritize immutable compaction work - // TODO: remove memtable flush from normal compaction work - if (cfd->imm()->imm_flush_needed.NoBarrier_Load() != nullptr) { - const uint64_t imm_start = env_->NowMicros(); - LogFlush(options_.info_log); - mutex_.Lock(); - if (cfd->imm()->IsFlushPending()) { - FlushMemTableToOutputFile(cfd, nullptr, deletion_state); - bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary - } - mutex_.Unlock(); - imm_micros += (env_->NowMicros() - imm_start); - } - Slice key = input->key(); Slice value = input->value(); diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index e7994d798a..47ee930e83 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -526,13 +526,17 @@ struct DBOptions { // regardless of this setting uint64_t delete_obsolete_files_period_micros; - // Maximum number of concurrent background jobs, submitted to - // the default LOW priority thread pool + // Maximum number of concurrent background compaction jobs, submitted to + // the default LOW priority thread pool. + // If you're increasing this, also consider increasing number of threads in + // LOW priority thread pool. For more information, see + // Env::SetBackgroundThreads // Default: 1 int max_background_compactions; // Maximum number of concurrent background memtable flush jobs, submitted to // the HIGH priority thread pool. + // // By default, all background jobs (major compaction and memtable flush) go // to the LOW priority pool. If this option is set to a positive number, // memtable flush jobs will be submitted to the HIGH priority pool. @@ -540,7 +544,11 @@ struct DBOptions { // Without a separate pool, long running major compaction jobs could // potentially block memtable flush jobs of other db instances, leading to // unnecessary Put stalls. - // Default: 0 + // + // If you're increasing this, also consider increasing number of threads in + // HIGH priority thread pool. For more information, see + // Env::SetBackgroundThreads + // Default: 1 int max_background_flushes; // Specify the maximal size of the info log file. If the log file diff --git a/util/options.cc b/util/options.cc index 50d1e850e1..212dc46537 100644 --- a/util/options.cc +++ b/util/options.cc @@ -150,7 +150,7 @@ DBOptions::DBOptions() wal_dir(""), delete_obsolete_files_period_micros(6 * 60 * 60 * 1000000UL), max_background_compactions(1), - max_background_flushes(0), + max_background_flushes(1), max_log_file_size(0), log_file_time_to_roll(0), keep_log_file_num(1000),