Eliminate SHAMapInnerNode lock contention:

The `SHAMapInnerNode` class had a global mutex to protect the array of node children. Profiling suggested that around 4% of all attempts to lock the global would block. This commit removes that global mutex, and replaces it with a new per-node 16-way spinlock (implemented so as not to effect the size of an inner node objet), effectively eliminating the lock contention.
2025-12-06 17:27:52 +00:00 · 2021-11-20 00:02:09 -08:00
parent 34ca457132
commit 1b9387eddc
4 changed files with 103 additions and 16 deletions
--- a/src/ripple/shamap/SHAMapInnerNode.h
+++ b/src/ripple/shamap/SHAMapInnerNode.h
@@ -27,8 +27,10 @@
 #include <ripple/shamap/SHAMapTreeNode.h>
 #include <ripple/shamap/impl/TaggedPointer.h>
 #include <atomic>
 #include <bitset>
 #include <cstdint>
 #include <limits>
 #include <memory>
 #include <mutex>
 #include <optional>
@@ -53,7 +55,8 @@ private:
    std::uint32_t fullBelowGen_ = 0;
    std::uint16_t isBranch_ = 0;
-    static std::mutex childLock;
+    /** A bitlock for the children of this node, with one bit per child */
    mutable std::atomic<std::uint16_t> lock_ = 0;
    /** Convert arrays stored in `hashesAndChildren_` so they can store the
        requested number of children.
@@ -155,7 +158,7 @@ public:
    std::shared_ptr<SHAMapTreeNode>
    getChild(int branch);
-    virtual std::shared_ptr<SHAMapTreeNode>
+    std::shared_ptr<SHAMapTreeNode>
    canonicalizeChild(int branch, std::shared_ptr<SHAMapTreeNode> node);
    // sync functions
--- a/src/ripple/shamap/impl/SHAMapInnerNode.cpp
+++ b/src/ripple/shamap/impl/SHAMapInnerNode.cpp
@@ -19,11 +19,9 @@
 #include <ripple/shamap/SHAMapInnerNode.h>
 #include <ripple/basics/ByteUtilities.h>
 #include <ripple/basics/Log.h>
 #include <ripple/basics/Slice.h>
 #include <ripple/basics/contract.h>
 #include <ripple/basics/safe_cast.h>
 #include <ripple/beast/core/LexicalCast.h>
 #include <ripple/protocol/HashPrefix.h>
 #include <ripple/protocol/digest.h>
@@ -33,14 +31,88 @@
 #include <openssl/sha.h>
 #include <algorithm>
 #include <array>
 #include <iterator>
 #include <mutex>
 #include <utility>
 // This is used for the _mm_pause instruction:
 #include <immintrin.h>
 namespace ripple {
-std::mutex SHAMapInnerNode::childLock;
+/** A specialized 16-way spinlock used to protect inner node branches.
    This class packs 16 separate spinlocks into a single 16-bit value. It makes
    it possible to lock any one lock at once or, alternatively, all together.
    The implementation tries to use portable constructs but has to be low-level
    for performance.
 */
 class SpinBitlock
 {
 private:
    std::atomic<std::uint16_t>& bits_;
    std::uint16_t mask_;
 public:
    SpinBitlock(std::atomic<std::uint16_t>& lock) : bits_(lock), mask_(0xFFFF)
    {
    }
    SpinBitlock(std::atomic<std::uint16_t>& lock, int index)
        : bits_(lock), mask_(1 << index)
    {
        assert(index >= 0 && index < 16);
    }
    [[nodiscard]] bool
    try_lock()
    {
        // If we want to grab all the individual bitlocks at once we cannot
        // use `fetch_or`! To see why, imagine that `lock_ == 0x0020` which
        // means that the `fetch_or` would return `0x0020` but all the bits
        // would already be (incorrectly!) set. Oops!
        std::uint16_t expected = 0;
        if (mask_ != 0xFFFF)
            return (bits_.fetch_or(mask_, std::memory_order_acquire) & mask_) ==
                expected;
        return bits_.compare_exchange_weak(
            expected,
            mask_,
            std::memory_order_acquire,
            std::memory_order_relaxed);
    }
    void
    lock()
    {
        // Testing suggests that 99.9999% of the time this will succeed, so
        // we try to optimize the fast path.
        if (try_lock())
            return;
        do
        {
            // We try to spin for a few times:
            for (int i = 0; i != 100; ++i)
            {
                if (try_lock())
                    return;
                _mm_pause();
            }
            std::this_thread::yield();
        } while ((bits_.load(std::memory_order_relaxed) & mask_) == 0);
    }
    void
    unlock()
    {
        bits_.fetch_and(~mask_, std::memory_order_release);
    }
 };
 SHAMapInnerNode::SHAMapInnerNode(
    std::uint32_t cowid,
@@ -108,7 +180,10 @@ SHAMapInnerNode::clone(std::uint32_t cowid) const
            cloneHashes[branchNum] = thisHashes[indexNum];
        });
    }
-    std::lock_guard lock(childLock);
+
    SpinBitlock sl(lock_);
    std::lock_guard lock(sl);
    if (thisIsSparse)
    {
        int cloneChildIndex = 0;
@@ -341,8 +416,11 @@ SHAMapInnerNode::getChildPointer(int branch)
    assert(branch >= 0 && branch < branchFactor);
    assert(!isEmptyBranch(branch));
-    std::lock_guard lock(childLock);
+    auto const index = *getChildIndex(branch);
-    return hashesAndChildren_.getChildren()[*getChildIndex(branch)].get();
+
    SpinBitlock sl(lock_, index);
    std::lock_guard lock(sl);
    return hashesAndChildren_.getChildren()[index].get();
 }
 std::shared_ptr<SHAMapTreeNode>
@@ -351,8 +429,11 @@ SHAMapInnerNode::getChild(int branch)
    assert(branch >= 0 && branch < branchFactor);
    assert(!isEmptyBranch(branch));
-    std::lock_guard lock(childLock);
+    auto const index = *getChildIndex(branch);
-    return hashesAndChildren_.getChildren()[*getChildIndex(branch)];
+
    SpinBitlock sl(lock_, index);
    std::lock_guard lock(sl);
    return hashesAndChildren_.getChildren()[index];
 }
 SHAMapHash const&
@@ -377,7 +458,9 @@ SHAMapInnerNode::canonicalizeChild(
    auto [_, hashes, children] = hashesAndChildren_.getHashesAndChildren();
    assert(node->getHash() == hashes[childIndex]);
-    std::lock_guard lock(childLock);
+    SpinBitlock sl(lock_, childIndex);
    std::lock_guard lock(sl);
    if (children[childIndex])
    {
        // There is already a node hooked up, return it
--- a/src/ripple/shamap/impl/SHAMapSync.cpp
+++ b/src/ripple/shamap/impl/SHAMapSync.cpp
@@ -583,7 +583,6 @@ SHAMap::addKnownNode(
    }
    auto const generation = f_.getFullBelowCache(ledgerSeq_)->getGeneration();
    auto newNode = SHAMapTreeNode::makeFromWire(rawNode);
    SHAMapNodeID iNodeID;
    auto iNode = root_.get();
@@ -612,6 +611,8 @@ SHAMap::addKnownNode(
        if (iNode == nullptr)
        {
            auto newNode = SHAMapTreeNode::makeFromWire(rawNode);
            if (!newNode || childHash != newNode->getHash())
            {
                JLOG(journal_.warn()) << "Corrupt node received";
--- a/src/ripple/shamap/impl/TaggedPointer.ipp
+++ b/src/ripple/shamap/impl/TaggedPointer.ipp
@@ -17,9 +17,9 @@
 */
 //==============================================================================
-#include <ripple/shamap/impl/TaggedPointer.h>
+#include <ripple/basics/ByteUtilities.h>
 #include <ripple/shamap/SHAMapInnerNode.h>
 #include <ripple/shamap/impl/TaggedPointer.h>
 #include <array>