mirror of
https://github.com/XRPLF/rippled.git
synced 2025-11-18 10:05:51 +00:00
Updates RocksDB to version 9.7.3, the latest version supported in Conan 1.x. A patch for 9.7.4 that fixes a memory leak is included.
320 lines
12 KiB
Diff
320 lines
12 KiB
Diff
diff --git a/HISTORY.md b/HISTORY.md
|
|
index 36d472229..05ad1a202 100644
|
|
--- a/HISTORY.md
|
|
+++ b/HISTORY.md
|
|
@@ -1,6 +1,10 @@
|
|
# Rocksdb Change Log
|
|
> NOTE: Entries for next release do not go here. Follow instructions in `unreleased_history/README.txt`
|
|
|
|
+## 9.7.4 (10/31/2024)
|
|
+### Bug Fixes
|
|
+* Fix a leak of obsolete blob files left open until DB::Close(). This bug was introduced in version 9.4.0.
|
|
+
|
|
## 9.7.3 (10/16/2024)
|
|
### Behavior Changes
|
|
* OPTIONS file to be loaded by remote worker is now preserved so that it does not get purged by the primary host. A similar technique as how we are preserving new SST files from getting purged is used for this. min_options_file_numbers_ is tracked like pending_outputs_ is tracked.
|
|
diff --git a/db/blob/blob_file_cache.cc b/db/blob/blob_file_cache.cc
|
|
index 5f340aadf..1b9faa238 100644
|
|
--- a/db/blob/blob_file_cache.cc
|
|
+++ b/db/blob/blob_file_cache.cc
|
|
@@ -42,6 +42,7 @@ Status BlobFileCache::GetBlobFileReader(
|
|
assert(blob_file_reader);
|
|
assert(blob_file_reader->IsEmpty());
|
|
|
|
+ // NOTE: sharing same Cache with table_cache
|
|
const Slice key = GetSliceForKey(&blob_file_number);
|
|
|
|
assert(cache_);
|
|
@@ -98,4 +99,13 @@ Status BlobFileCache::GetBlobFileReader(
|
|
return Status::OK();
|
|
}
|
|
|
|
+void BlobFileCache::Evict(uint64_t blob_file_number) {
|
|
+ // NOTE: sharing same Cache with table_cache
|
|
+ const Slice key = GetSliceForKey(&blob_file_number);
|
|
+
|
|
+ assert(cache_);
|
|
+
|
|
+ cache_.get()->Erase(key);
|
|
+}
|
|
+
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
diff --git a/db/blob/blob_file_cache.h b/db/blob/blob_file_cache.h
|
|
index 740e67ada..6858d012b 100644
|
|
--- a/db/blob/blob_file_cache.h
|
|
+++ b/db/blob/blob_file_cache.h
|
|
@@ -36,6 +36,15 @@ class BlobFileCache {
|
|
uint64_t blob_file_number,
|
|
CacheHandleGuard<BlobFileReader>* blob_file_reader);
|
|
|
|
+ // Called when a blob file is obsolete to ensure it is removed from the cache
|
|
+ // to avoid effectively leaking the open file and assicated memory
|
|
+ void Evict(uint64_t blob_file_number);
|
|
+
|
|
+ // Used to identify cache entries for blob files (not normally useful)
|
|
+ static const Cache::CacheItemHelper* GetHelper() {
|
|
+ return CacheInterface::GetBasicHelper();
|
|
+ }
|
|
+
|
|
private:
|
|
using CacheInterface =
|
|
BasicTypedCacheInterface<BlobFileReader, CacheEntryRole::kMisc>;
|
|
diff --git a/db/column_family.h b/db/column_family.h
|
|
index e4b7adde8..86637736a 100644
|
|
--- a/db/column_family.h
|
|
+++ b/db/column_family.h
|
|
@@ -401,6 +401,7 @@ class ColumnFamilyData {
|
|
SequenceNumber earliest_seq);
|
|
|
|
TableCache* table_cache() const { return table_cache_.get(); }
|
|
+ BlobFileCache* blob_file_cache() const { return blob_file_cache_.get(); }
|
|
BlobSource* blob_source() const { return blob_source_.get(); }
|
|
|
|
// See documentation in compaction_picker.h
|
|
diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc
|
|
index 261593423..06573ac2e 100644
|
|
--- a/db/db_impl/db_impl.cc
|
|
+++ b/db/db_impl/db_impl.cc
|
|
@@ -659,8 +659,9 @@ Status DBImpl::CloseHelper() {
|
|
// We need to release them before the block cache is destroyed. The block
|
|
// cache may be destroyed inside versions_.reset(), when column family data
|
|
// list is destroyed, so leaving handles in table cache after
|
|
- // versions_.reset() may cause issues.
|
|
- // Here we clean all unreferenced handles in table cache.
|
|
+ // versions_.reset() may cause issues. Here we clean all unreferenced handles
|
|
+ // in table cache, and (for certain builds/conditions) assert that no obsolete
|
|
+ // files are hanging around unreferenced (leak) in the table/blob file cache.
|
|
// Now we assume all user queries have finished, so only version set itself
|
|
// can possibly hold the blocks from block cache. After releasing unreferenced
|
|
// handles here, only handles held by version set left and inside
|
|
@@ -668,6 +669,9 @@ Status DBImpl::CloseHelper() {
|
|
// time a handle is released, we erase it from the cache too. By doing that,
|
|
// we can guarantee that after versions_.reset(), table cache is empty
|
|
// so the cache can be safely destroyed.
|
|
+#ifndef NDEBUG
|
|
+ TEST_VerifyNoObsoleteFilesCached(/*db_mutex_already_held=*/true);
|
|
+#endif // !NDEBUG
|
|
table_cache_->EraseUnRefEntries();
|
|
|
|
for (auto& txn_entry : recovered_transactions_) {
|
|
@@ -3227,6 +3231,8 @@ Status DBImpl::MultiGetImpl(
|
|
s = Status::Aborted();
|
|
break;
|
|
}
|
|
+ // This could be a long-running operation
|
|
+ ROCKSDB_THREAD_YIELD_HOOK();
|
|
}
|
|
|
|
// Post processing (decrement reference counts and record statistics)
|
|
diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h
|
|
index 5e4fa310b..ccc0abfa7 100644
|
|
--- a/db/db_impl/db_impl.h
|
|
+++ b/db/db_impl/db_impl.h
|
|
@@ -1241,9 +1241,14 @@ class DBImpl : public DB {
|
|
static Status TEST_ValidateOptions(const DBOptions& db_options) {
|
|
return ValidateOptions(db_options);
|
|
}
|
|
-
|
|
#endif // NDEBUG
|
|
|
|
+ // In certain configurations, verify that the table/blob file cache only
|
|
+ // contains entries for live files, to check for effective leaks of open
|
|
+ // files. This can only be called when purging of obsolete files has
|
|
+ // "settled," such as during parts of DB Close().
|
|
+ void TEST_VerifyNoObsoleteFilesCached(bool db_mutex_already_held) const;
|
|
+
|
|
// persist stats to column family "_persistent_stats"
|
|
void PersistStats();
|
|
|
|
diff --git a/db/db_impl/db_impl_debug.cc b/db/db_impl/db_impl_debug.cc
|
|
index 790a50d7a..67f5b4aaf 100644
|
|
--- a/db/db_impl/db_impl_debug.cc
|
|
+++ b/db/db_impl/db_impl_debug.cc
|
|
@@ -9,6 +9,7 @@
|
|
|
|
#ifndef NDEBUG
|
|
|
|
+#include "db/blob/blob_file_cache.h"
|
|
#include "db/column_family.h"
|
|
#include "db/db_impl/db_impl.h"
|
|
#include "db/error_handler.h"
|
|
@@ -328,5 +329,49 @@ size_t DBImpl::TEST_EstimateInMemoryStatsHistorySize() const {
|
|
InstrumentedMutexLock l(&const_cast<DBImpl*>(this)->stats_history_mutex_);
|
|
return EstimateInMemoryStatsHistorySize();
|
|
}
|
|
+
|
|
+void DBImpl::TEST_VerifyNoObsoleteFilesCached(
|
|
+ bool db_mutex_already_held) const {
|
|
+ // This check is somewhat expensive and obscure to make a part of every
|
|
+ // unit test in every build variety. Thus, we only enable it for ASAN builds.
|
|
+ if (!kMustFreeHeapAllocations) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ std::optional<InstrumentedMutexLock> l;
|
|
+ if (db_mutex_already_held) {
|
|
+ mutex_.AssertHeld();
|
|
+ } else {
|
|
+ l.emplace(&mutex_);
|
|
+ }
|
|
+
|
|
+ std::vector<uint64_t> live_files;
|
|
+ for (auto cfd : *versions_->GetColumnFamilySet()) {
|
|
+ if (cfd->IsDropped()) {
|
|
+ continue;
|
|
+ }
|
|
+ // Sneakily add both SST and blob files to the same list
|
|
+ cfd->current()->AddLiveFiles(&live_files, &live_files);
|
|
+ }
|
|
+ std::sort(live_files.begin(), live_files.end());
|
|
+
|
|
+ auto fn = [&live_files](const Slice& key, Cache::ObjectPtr, size_t,
|
|
+ const Cache::CacheItemHelper* helper) {
|
|
+ if (helper != BlobFileCache::GetHelper()) {
|
|
+ // Skip non-blob files for now
|
|
+ // FIXME: diagnose and fix the leaks of obsolete SST files revealed in
|
|
+ // unit tests.
|
|
+ return;
|
|
+ }
|
|
+ // See TableCache and BlobFileCache
|
|
+ assert(key.size() == sizeof(uint64_t));
|
|
+ uint64_t file_number;
|
|
+ GetUnaligned(reinterpret_cast<const uint64_t*>(key.data()), &file_number);
|
|
+ // Assert file is in sorted live_files
|
|
+ assert(
|
|
+ std::binary_search(live_files.begin(), live_files.end(), file_number));
|
|
+ };
|
|
+ table_cache_->ApplyToAllEntries(fn, {});
|
|
+}
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
#endif // NDEBUG
|
|
diff --git a/db/db_iter.cc b/db/db_iter.cc
|
|
index e02586377..bf4749eb9 100644
|
|
--- a/db/db_iter.cc
|
|
+++ b/db/db_iter.cc
|
|
@@ -540,6 +540,8 @@ bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key,
|
|
} else {
|
|
iter_.Next();
|
|
}
|
|
+ // This could be a long-running operation due to tombstones, etc.
|
|
+ ROCKSDB_THREAD_YIELD_HOOK();
|
|
} while (iter_.Valid());
|
|
|
|
valid_ = false;
|
|
diff --git a/db/table_cache.cc b/db/table_cache.cc
|
|
index 71fc29c32..8a5be75e8 100644
|
|
--- a/db/table_cache.cc
|
|
+++ b/db/table_cache.cc
|
|
@@ -164,6 +164,7 @@ Status TableCache::GetTableReader(
|
|
}
|
|
|
|
Cache::Handle* TableCache::Lookup(Cache* cache, uint64_t file_number) {
|
|
+ // NOTE: sharing same Cache with BlobFileCache
|
|
Slice key = GetSliceForFileNumber(&file_number);
|
|
return cache->Lookup(key);
|
|
}
|
|
@@ -179,6 +180,7 @@ Status TableCache::FindTable(
|
|
size_t max_file_size_for_l0_meta_pin, Temperature file_temperature) {
|
|
PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos, ioptions_.clock);
|
|
uint64_t number = file_meta.fd.GetNumber();
|
|
+ // NOTE: sharing same Cache with BlobFileCache
|
|
Slice key = GetSliceForFileNumber(&number);
|
|
*handle = cache_.Lookup(key);
|
|
TEST_SYNC_POINT_CALLBACK("TableCache::FindTable:0",
|
|
diff --git a/db/version_builder.cc b/db/version_builder.cc
|
|
index ed8ab8214..c98f53f42 100644
|
|
--- a/db/version_builder.cc
|
|
+++ b/db/version_builder.cc
|
|
@@ -24,6 +24,7 @@
|
|
#include <vector>
|
|
|
|
#include "cache/cache_reservation_manager.h"
|
|
+#include "db/blob/blob_file_cache.h"
|
|
#include "db/blob/blob_file_meta.h"
|
|
#include "db/dbformat.h"
|
|
#include "db/internal_stats.h"
|
|
@@ -744,12 +745,9 @@ class VersionBuilder::Rep {
|
|
return Status::Corruption("VersionBuilder", oss.str());
|
|
}
|
|
|
|
- // Note: we use C++11 for now but in C++14, this could be done in a more
|
|
- // elegant way using generalized lambda capture.
|
|
- VersionSet* const vs = version_set_;
|
|
- const ImmutableCFOptions* const ioptions = ioptions_;
|
|
-
|
|
- auto deleter = [vs, ioptions](SharedBlobFileMetaData* shared_meta) {
|
|
+ auto deleter = [vs = version_set_, ioptions = ioptions_,
|
|
+ bc = cfd_ ? cfd_->blob_file_cache()
|
|
+ : nullptr](SharedBlobFileMetaData* shared_meta) {
|
|
if (vs) {
|
|
assert(ioptions);
|
|
assert(!ioptions->cf_paths.empty());
|
|
@@ -758,6 +756,9 @@ class VersionBuilder::Rep {
|
|
vs->AddObsoleteBlobFile(shared_meta->GetBlobFileNumber(),
|
|
ioptions->cf_paths.front().path);
|
|
}
|
|
+ if (bc) {
|
|
+ bc->Evict(shared_meta->GetBlobFileNumber());
|
|
+ }
|
|
|
|
delete shared_meta;
|
|
};
|
|
@@ -766,7 +767,7 @@ class VersionBuilder::Rep {
|
|
blob_file_number, blob_file_addition.GetTotalBlobCount(),
|
|
blob_file_addition.GetTotalBlobBytes(),
|
|
blob_file_addition.GetChecksumMethod(),
|
|
- blob_file_addition.GetChecksumValue(), deleter);
|
|
+ blob_file_addition.GetChecksumValue(), std::move(deleter));
|
|
|
|
mutable_blob_file_metas_.emplace(
|
|
blob_file_number, MutableBlobFileMetaData(std::move(shared_meta)));
|
|
diff --git a/db/version_set.h b/db/version_set.h
|
|
index 9336782b1..024f869e7 100644
|
|
--- a/db/version_set.h
|
|
+++ b/db/version_set.h
|
|
@@ -1514,7 +1514,6 @@ class VersionSet {
|
|
void GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata);
|
|
|
|
void AddObsoleteBlobFile(uint64_t blob_file_number, std::string path) {
|
|
- // TODO: Erase file from BlobFileCache?
|
|
obsolete_blob_files_.emplace_back(blob_file_number, std::move(path));
|
|
}
|
|
|
|
diff --git a/include/rocksdb/version.h b/include/rocksdb/version.h
|
|
index 2a19796b8..0afa2cab1 100644
|
|
--- a/include/rocksdb/version.h
|
|
+++ b/include/rocksdb/version.h
|
|
@@ -13,7 +13,7 @@
|
|
// minor or major version number planned for release.
|
|
#define ROCKSDB_MAJOR 9
|
|
#define ROCKSDB_MINOR 7
|
|
-#define ROCKSDB_PATCH 3
|
|
+#define ROCKSDB_PATCH 4
|
|
|
|
// Do not use these. We made the mistake of declaring macros starting with
|
|
// double underscore. Now we have to live with our choice. We'll deprecate these
|
|
diff --git a/port/port.h b/port/port.h
|
|
index 13aa56d47..141716e5b 100644
|
|
--- a/port/port.h
|
|
+++ b/port/port.h
|
|
@@ -19,3 +19,19 @@
|
|
#elif defined(OS_WIN)
|
|
#include "port/win/port_win.h"
|
|
#endif
|
|
+
|
|
+#ifdef OS_LINUX
|
|
+// A temporary hook into long-running RocksDB threads to support modifying their
|
|
+// priority etc. This should become a public API hook once the requirements
|
|
+// are better understood.
|
|
+extern "C" void RocksDbThreadYield() __attribute__((__weak__));
|
|
+#define ROCKSDB_THREAD_YIELD_HOOK() \
|
|
+ { \
|
|
+ if (RocksDbThreadYield) { \
|
|
+ RocksDbThreadYield(); \
|
|
+ } \
|
|
+ }
|
|
+#else
|
|
+#define ROCKSDB_THREAD_YIELD_HOOK() \
|
|
+ {}
|
|
+#endif
|