perf: Move mutex to the partition level (#5486)

This change introduces two key optimizations:
* Mutex scope reduction: Limits the lock to individual partitions within `TaggedCache`, reducing contention.
* Decoupling: Removes the tight coupling between `LedgerHistory` and `TaggedCache`, improving modularity and testability.

Lock contention analysis based on eBPF showed significant improvements as a result of this change.
This commit is contained in:
Valentin Balaschenko
2025-08-07 22:04:07 +01:00
committed by GitHub
parent 991891625a
commit 94decc753b
8 changed files with 135 additions and 143 deletions

View File

@@ -21,7 +21,6 @@
#define RIPPLE_BASICS_SHAMAP_HASH_H_INCLUDED
#include <xrpl/basics/base_uint.h>
#include <xrpl/basics/partitioned_unordered_map.h>
#include <ostream>

View File

@@ -90,9 +90,6 @@ public:
int
getCacheSize() const;
int
getTrackSize() const;
float
getHitRate();
@@ -170,9 +167,6 @@ public:
bool
retrieve(key_type const& key, T& data);
mutex_type&
peekMutex();
std::vector<key_type>
getKeys() const;
@@ -193,11 +187,14 @@ public:
private:
SharedPointerType
initialFetch(key_type const& key, std::lock_guard<mutex_type> const& l);
initialFetch(key_type const& key);
void
collect_metrics();
Mutex&
lockPartition(key_type const& key) const;
private:
struct Stats
{
@@ -300,8 +297,8 @@ private:
[[maybe_unused]] clock_type::time_point const& now,
typename KeyValueCacheType::map_type& partition,
SweptPointersVector& stuffToSweep,
std::atomic<int>& allRemovals,
std::lock_guard<std::recursive_mutex> const&);
std::atomic<int>& allRemoval,
Mutex& partitionLock);
[[nodiscard]] std::thread
sweepHelper(
@@ -310,14 +307,12 @@ private:
typename KeyOnlyCacheType::map_type& partition,
SweptPointersVector&,
std::atomic<int>& allRemovals,
std::lock_guard<std::recursive_mutex> const&);
Mutex& partitionLock);
beast::Journal m_journal;
clock_type& m_clock;
Stats m_stats;
mutex_type mutable m_mutex;
// Used for logging
std::string m_name;
@@ -328,10 +323,11 @@ private:
clock_type::duration const m_target_age;
// Number of items cached
int m_cache_count;
std::atomic<int> m_cache_count;
cache_type m_cache; // Hold strong reference to recent objects
std::uint64_t m_hits;
std::uint64_t m_misses;
std::atomic<std::uint64_t> m_hits;
std::atomic<std::uint64_t> m_misses;
mutable std::vector<mutex_type> partitionLocks_;
};
} // namespace ripple

View File

@@ -22,6 +22,7 @@
#include <xrpl/basics/IntrusivePointer.ipp>
#include <xrpl/basics/TaggedCache.h>
#include <xrpl/beast/core/CurrentThreadName.h>
namespace ripple {
@@ -60,6 +61,7 @@ inline TaggedCache<
, m_hits(0)
, m_misses(0)
{
partitionLocks_ = std::vector<mutex_type>(m_cache.partitions());
}
template <
@@ -105,8 +107,13 @@ TaggedCache<
KeyEqual,
Mutex>::size() const
{
std::lock_guard lock(m_mutex);
return m_cache.size();
std::size_t totalSize = 0;
for (size_t i = 0; i < partitionLocks_.size(); ++i)
{
std::lock_guard<Mutex> lock(partitionLocks_[i]);
totalSize += m_cache.map()[i].size();
}
return totalSize;
}
template <
@@ -129,32 +136,7 @@ TaggedCache<
KeyEqual,
Mutex>::getCacheSize() const
{
std::lock_guard lock(m_mutex);
return m_cache_count;
}
template <
class Key,
class T,
bool IsKeyCache,
class SharedWeakUnionPointer,
class SharedPointerType,
class Hash,
class KeyEqual,
class Mutex>
inline int
TaggedCache<
Key,
T,
IsKeyCache,
SharedWeakUnionPointer,
SharedPointerType,
Hash,
KeyEqual,
Mutex>::getTrackSize() const
{
std::lock_guard lock(m_mutex);
return m_cache.size();
return m_cache_count.load(std::memory_order_relaxed);
}
template <
@@ -177,9 +159,10 @@ TaggedCache<
KeyEqual,
Mutex>::getHitRate()
{
std::lock_guard lock(m_mutex);
auto const total = static_cast<float>(m_hits + m_misses);
return m_hits * (100.0f / std::max(1.0f, total));
auto const hits = m_hits.load(std::memory_order_relaxed);
auto const misses = m_misses.load(std::memory_order_relaxed);
float const total = float(hits + misses);
return hits * (100.0f / std::max(1.0f, total));
}
template <
@@ -202,9 +185,12 @@ TaggedCache<
KeyEqual,
Mutex>::clear()
{
std::lock_guard lock(m_mutex);
for (auto& mutex : partitionLocks_)
mutex.lock();
m_cache.clear();
m_cache_count = 0;
for (auto& mutex : partitionLocks_)
mutex.unlock();
m_cache_count.store(0, std::memory_order_relaxed);
}
template <
@@ -227,11 +213,9 @@ TaggedCache<
KeyEqual,
Mutex>::reset()
{
std::lock_guard lock(m_mutex);
m_cache.clear();
m_cache_count = 0;
m_hits = 0;
m_misses = 0;
clear();
m_hits.store(0, std::memory_order_relaxed);
m_misses.store(0, std::memory_order_relaxed);
}
template <
@@ -255,7 +239,7 @@ TaggedCache<
KeyEqual,
Mutex>::touch_if_exists(KeyComparable const& key)
{
std::lock_guard lock(m_mutex);
std::lock_guard<Mutex> lock(lockPartition(key));
auto const iter(m_cache.find(key));
if (iter == m_cache.end())
{
@@ -297,8 +281,6 @@ TaggedCache<
auto const start = std::chrono::steady_clock::now();
{
std::lock_guard lock(m_mutex);
if (m_target_size == 0 ||
(static_cast<int>(m_cache.size()) <= m_target_size))
{
@@ -330,12 +312,13 @@ TaggedCache<
m_cache.map()[p],
allStuffToSweep[p],
allRemovals,
lock));
partitionLocks_[p]));
}
for (std::thread& worker : workers)
worker.join();
m_cache_count -= allRemovals;
int removals = allRemovals.load(std::memory_order_relaxed);
m_cache_count.fetch_sub(removals, std::memory_order_relaxed);
}
// At this point allStuffToSweep will go out of scope outside the lock
// and decrement the reference count on each strong pointer.
@@ -369,7 +352,8 @@ TaggedCache<
{
// Remove from cache, if !valid, remove from map too. Returns true if
// removed from cache
std::lock_guard lock(m_mutex);
std::lock_guard<Mutex> lock(lockPartition(key));
auto cit = m_cache.find(key);
@@ -382,7 +366,7 @@ TaggedCache<
if (entry.isCached())
{
--m_cache_count;
m_cache_count.fetch_sub(1, std::memory_order_relaxed);
entry.ptr.convertToWeak();
ret = true;
}
@@ -420,17 +404,16 @@ TaggedCache<
{
// Return canonical value, store if needed, refresh in cache
// Return values: true=we had the data already
std::lock_guard lock(m_mutex);
std::lock_guard<Mutex> lock(lockPartition(key));
auto cit = m_cache.find(key);
if (cit == m_cache.end())
{
m_cache.emplace(
std::piecewise_construct,
std::forward_as_tuple(key),
std::forward_as_tuple(m_clock.now(), data));
++m_cache_count;
m_cache_count.fetch_add(1, std::memory_order_relaxed);
return false;
}
@@ -479,12 +462,12 @@ TaggedCache<
data = cachedData;
}
++m_cache_count;
m_cache_count.fetch_add(1, std::memory_order_relaxed);
return true;
}
entry.ptr = data;
++m_cache_count;
m_cache_count.fetch_add(1, std::memory_order_relaxed);
return false;
}
@@ -560,10 +543,11 @@ TaggedCache<
KeyEqual,
Mutex>::fetch(key_type const& key)
{
std::lock_guard<mutex_type> l(m_mutex);
auto ret = initialFetch(key, l);
std::lock_guard<Mutex> lock(lockPartition(key));
auto ret = initialFetch(key);
if (!ret)
++m_misses;
m_misses.fetch_add(1, std::memory_order_relaxed);
return ret;
}
@@ -627,8 +611,8 @@ TaggedCache<
Mutex>::insert(key_type const& key)
-> std::enable_if_t<IsKeyCache, ReturnType>
{
std::lock_guard lock(m_mutex);
clock_type::time_point const now(m_clock.now());
std::lock_guard<Mutex> lock(lockPartition(key));
auto [it, inserted] = m_cache.emplace(
std::piecewise_construct,
std::forward_as_tuple(key),
@@ -668,29 +652,6 @@ TaggedCache<
return true;
}
template <
class Key,
class T,
bool IsKeyCache,
class SharedWeakUnionPointer,
class SharedPointerType,
class Hash,
class KeyEqual,
class Mutex>
inline auto
TaggedCache<
Key,
T,
IsKeyCache,
SharedWeakUnionPointer,
SharedPointerType,
Hash,
KeyEqual,
Mutex>::peekMutex() -> mutex_type&
{
return m_mutex;
}
template <
class Key,
class T,
@@ -714,10 +675,13 @@ TaggedCache<
std::vector<key_type> v;
{
std::lock_guard lock(m_mutex);
v.reserve(m_cache.size());
for (auto const& _ : m_cache)
v.push_back(_.first);
for (std::size_t i = 0; i < partitionLocks_.size(); ++i)
{
std::lock_guard<Mutex> lock(partitionLocks_[i]);
for (auto const& entry : m_cache.map()[i])
v.push_back(entry.first);
}
}
return v;
@@ -743,11 +707,12 @@ TaggedCache<
KeyEqual,
Mutex>::rate() const
{
std::lock_guard lock(m_mutex);
auto const tot = m_hits + m_misses;
auto const hits = m_hits.load(std::memory_order_relaxed);
auto const misses = m_misses.load(std::memory_order_relaxed);
auto const tot = hits + misses;
if (tot == 0)
return 0;
return double(m_hits) / tot;
return 0.0;
return double(hits) / tot;
}
template <
@@ -771,18 +736,16 @@ TaggedCache<
KeyEqual,
Mutex>::fetch(key_type const& digest, Handler const& h)
{
{
std::lock_guard l(m_mutex);
if (auto ret = initialFetch(digest, l))
std::lock_guard<Mutex> lock(lockPartition(digest));
if (auto ret = initialFetch(digest))
return ret;
}
auto sle = h();
if (!sle)
return {};
std::lock_guard l(m_mutex);
++m_misses;
m_misses.fetch_add(1, std::memory_order_relaxed);
auto const [it, inserted] =
m_cache.emplace(digest, Entry(m_clock.now(), std::move(sle)));
if (!inserted)
@@ -809,9 +772,10 @@ TaggedCache<
SharedPointerType,
Hash,
KeyEqual,
Mutex>::
initialFetch(key_type const& key, std::lock_guard<mutex_type> const& l)
Mutex>::initialFetch(key_type const& key)
{
std::lock_guard<Mutex> lock(lockPartition(key));
auto cit = m_cache.find(key);
if (cit == m_cache.end())
return {};
@@ -819,7 +783,7 @@ TaggedCache<
Entry& entry = cit->second;
if (entry.isCached())
{
++m_hits;
m_hits.fetch_add(1, std::memory_order_relaxed);
entry.touch(m_clock.now());
return entry.ptr.getStrong();
}
@@ -827,12 +791,13 @@ TaggedCache<
if (entry.isCached())
{
// independent of cache size, so not counted as a hit
++m_cache_count;
m_cache_count.fetch_add(1, std::memory_order_relaxed);
entry.touch(m_clock.now());
return entry.ptr.getStrong();
}
m_cache.erase(cit);
return {};
}
@@ -861,10 +826,11 @@ TaggedCache<
{
beast::insight::Gauge::value_type hit_rate(0);
{
std::lock_guard lock(m_mutex);
auto const total(m_hits + m_misses);
auto const hits = m_hits.load(std::memory_order_relaxed);
auto const misses = m_misses.load(std::memory_order_relaxed);
auto const total = hits + misses;
if (total != 0)
hit_rate = (m_hits * 100) / total;
hit_rate = (hits * 100) / total;
}
m_stats.hit_rate.set(hit_rate);
}
@@ -895,12 +861,16 @@ TaggedCache<
typename KeyValueCacheType::map_type& partition,
SweptPointersVector& stuffToSweep,
std::atomic<int>& allRemovals,
std::lock_guard<std::recursive_mutex> const&)
Mutex& partitionLock)
{
return std::thread([&, this]() {
beast::setCurrentThreadName("sweep-KVCache");
int cacheRemovals = 0;
int mapRemovals = 0;
std::lock_guard<Mutex> lock(partitionLock);
// Keep references to all the stuff we sweep
// so that we can destroy them outside the lock.
stuffToSweep.reserve(partition.size());
@@ -984,12 +954,16 @@ TaggedCache<
typename KeyOnlyCacheType::map_type& partition,
SweptPointersVector&,
std::atomic<int>& allRemovals,
std::lock_guard<std::recursive_mutex> const&)
Mutex& partitionLock)
{
return std::thread([&, this]() {
beast::setCurrentThreadName("sweep-KCache");
int cacheRemovals = 0;
int mapRemovals = 0;
std::lock_guard<Mutex> lock(partitionLock);
// Keep references to all the stuff we sweep
// so that we can destroy them outside the lock.
{
@@ -1024,6 +998,29 @@ TaggedCache<
});
}
template <
class Key,
class T,
bool IsKeyCache,
class SharedWeakUnionPointer,
class SharedPointerType,
class Hash,
class KeyEqual,
class Mutex>
inline Mutex&
TaggedCache<
Key,
T,
IsKeyCache,
SharedWeakUnionPointer,
SharedPointerType,
Hash,
KeyEqual,
Mutex>::lockPartition(key_type const& key) const
{
return partitionLocks_[m_cache.partition_index(key)];
}
} // namespace ripple
#endif

View File

@@ -277,6 +277,12 @@ public:
return map_;
}
partition_map_type const&
map() const
{
return map_;
}
iterator
begin()
{
@@ -321,6 +327,12 @@ public:
return cend();
}
std::size_t
partition_index(key_type const& key) const
{
return partitioner(key);
}
private:
template <class T>
void

View File

@@ -22,7 +22,6 @@
#include <xrpl/basics/ByteUtilities.h>
#include <xrpl/basics/base_uint.h>
#include <xrpl/basics/partitioned_unordered_map.h>
#include <cstdint>

View File

@@ -58,10 +58,10 @@ public:
// Insert an item, retrieve it, and age it so it gets purged.
{
BEAST_EXPECT(c.getCacheSize() == 0);
BEAST_EXPECT(c.getTrackSize() == 0);
BEAST_EXPECT(c.size() == 0);
BEAST_EXPECT(!c.insert(1, "one"));
BEAST_EXPECT(c.getCacheSize() == 1);
BEAST_EXPECT(c.getTrackSize() == 1);
BEAST_EXPECT(c.size() == 1);
{
std::string s;
@@ -72,7 +72,7 @@ public:
++clock;
c.sweep();
BEAST_EXPECT(c.getCacheSize() == 0);
BEAST_EXPECT(c.getTrackSize() == 0);
BEAST_EXPECT(c.size() == 0);
}
// Insert an item, maintain a strong pointer, age it, and
@@ -80,7 +80,7 @@ public:
{
BEAST_EXPECT(!c.insert(2, "two"));
BEAST_EXPECT(c.getCacheSize() == 1);
BEAST_EXPECT(c.getTrackSize() == 1);
BEAST_EXPECT(c.size() == 1);
{
auto p = c.fetch(2);
@@ -88,14 +88,14 @@ public:
++clock;
c.sweep();
BEAST_EXPECT(c.getCacheSize() == 0);
BEAST_EXPECT(c.getTrackSize() == 1);
BEAST_EXPECT(c.size() == 1);
}
// Make sure its gone now that our reference is gone
++clock;
c.sweep();
BEAST_EXPECT(c.getCacheSize() == 0);
BEAST_EXPECT(c.getTrackSize() == 0);
BEAST_EXPECT(c.size() == 0);
}
// Insert the same key/value pair and make sure we get the same result
@@ -111,7 +111,7 @@ public:
++clock;
c.sweep();
BEAST_EXPECT(c.getCacheSize() == 0);
BEAST_EXPECT(c.getTrackSize() == 0);
BEAST_EXPECT(c.size() == 0);
}
// Put an object in but keep a strong pointer to it, advance the clock a
@@ -121,24 +121,24 @@ public:
// Put an object in
BEAST_EXPECT(!c.insert(4, "four"));
BEAST_EXPECT(c.getCacheSize() == 1);
BEAST_EXPECT(c.getTrackSize() == 1);
BEAST_EXPECT(c.size() == 1);
{
// Keep a strong pointer to it
auto const p1 = c.fetch(4);
BEAST_EXPECT(p1 != nullptr);
BEAST_EXPECT(c.getCacheSize() == 1);
BEAST_EXPECT(c.getTrackSize() == 1);
BEAST_EXPECT(c.size() == 1);
// Advance the clock a lot
++clock;
c.sweep();
BEAST_EXPECT(c.getCacheSize() == 0);
BEAST_EXPECT(c.getTrackSize() == 1);
BEAST_EXPECT(c.size() == 1);
// Canonicalize a new object with the same key
auto p2 = std::make_shared<std::string>("four");
BEAST_EXPECT(c.canonicalize_replace_client(4, p2));
BEAST_EXPECT(c.getCacheSize() == 1);
BEAST_EXPECT(c.getTrackSize() == 1);
BEAST_EXPECT(c.size() == 1);
// Make sure we get the original object
BEAST_EXPECT(p1.get() == p2.get());
}
@@ -146,7 +146,7 @@ public:
++clock;
c.sweep();
BEAST_EXPECT(c.getCacheSize() == 0);
BEAST_EXPECT(c.getTrackSize() == 0);
BEAST_EXPECT(c.size() == 0);
}
}
};

View File

@@ -63,8 +63,6 @@ LedgerHistory::insert(
ledger->stateMap().getHash().isNonZero(),
"ripple::LedgerHistory::insert : nonzero hash");
std::unique_lock sl(m_ledgers_by_hash.peekMutex());
bool const alreadyHad = m_ledgers_by_hash.canonicalize_replace_cache(
ledger->info().hash, ledger);
if (validated)
@@ -76,7 +74,6 @@ LedgerHistory::insert(
LedgerHash
LedgerHistory::getLedgerHash(LedgerIndex index)
{
std::unique_lock sl(m_ledgers_by_hash.peekMutex());
if (auto it = mLedgersByIndex.find(index); it != mLedgersByIndex.end())
return it->second;
return {};
@@ -86,13 +83,11 @@ std::shared_ptr<Ledger const>
LedgerHistory::getLedgerBySeq(LedgerIndex index)
{
{
std::unique_lock sl(m_ledgers_by_hash.peekMutex());
auto it = mLedgersByIndex.find(index);
if (it != mLedgersByIndex.end())
{
uint256 hash = it->second;
sl.unlock();
return getLedgerByHash(hash);
}
}
@@ -108,7 +103,6 @@ LedgerHistory::getLedgerBySeq(LedgerIndex index)
{
// Add this ledger to the local tracking by index
std::unique_lock sl(m_ledgers_by_hash.peekMutex());
XRPL_ASSERT(
ret->isImmutable(),
@@ -458,8 +452,6 @@ LedgerHistory::builtLedger(
XRPL_ASSERT(
!hash.isZero(), "ripple::LedgerHistory::builtLedger : nonzero hash");
std::unique_lock sl(m_consensus_validated.peekMutex());
auto entry = std::make_shared<cv_entry>();
m_consensus_validated.canonicalize_replace_client(index, entry);
@@ -500,8 +492,6 @@ LedgerHistory::validatedLedger(
!hash.isZero(),
"ripple::LedgerHistory::validatedLedger : nonzero hash");
std::unique_lock sl(m_consensus_validated.peekMutex());
auto entry = std::make_shared<cv_entry>();
m_consensus_validated.canonicalize_replace_client(index, entry);
@@ -535,10 +525,9 @@ LedgerHistory::validatedLedger(
bool
LedgerHistory::fixIndex(LedgerIndex ledgerIndex, LedgerHash const& ledgerHash)
{
std::unique_lock sl(m_ledgers_by_hash.peekMutex());
auto ledger = m_ledgers_by_hash.fetch(ledgerHash);
auto it = mLedgersByIndex.find(ledgerIndex);
if ((it != mLedgersByIndex.end()) && (it->second != ledgerHash))
if (ledger && (it != mLedgersByIndex.end()) && (it->second != ledgerHash))
{
it->second = ledgerHash;
return false;

View File

@@ -114,7 +114,7 @@ getCountsJson(Application& app, int minObjectCount)
ret[jss::treenode_cache_size] =
app.getNodeFamily().getTreeNodeCache()->getCacheSize();
ret[jss::treenode_track_size] =
app.getNodeFamily().getTreeNodeCache()->getTrackSize();
static_cast<int>(app.getNodeFamily().getTreeNodeCache()->size());
std::string uptime;
auto s = UptimeClock::now();