mirror of
https://github.com/Xahau/xahaud.git
synced 2025-12-06 17:27:52 +00:00
Eliminate SHAMapInnerNode lock contention:
The `SHAMapInnerNode` class had a global mutex to protect the array of node children. Profiling suggested that around 4% of all attempts to lock the global would block. This commit removes that global mutex, and replaces it with a new per-node 16-way spinlock (implemented so as not to effect the size of an inner node objet), effectively eliminating the lock contention.
This commit is contained in:
@@ -27,8 +27,10 @@
|
|||||||
#include <ripple/shamap/SHAMapTreeNode.h>
|
#include <ripple/shamap/SHAMapTreeNode.h>
|
||||||
#include <ripple/shamap/impl/TaggedPointer.h>
|
#include <ripple/shamap/impl/TaggedPointer.h>
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <limits>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
@@ -53,7 +55,8 @@ private:
|
|||||||
std::uint32_t fullBelowGen_ = 0;
|
std::uint32_t fullBelowGen_ = 0;
|
||||||
std::uint16_t isBranch_ = 0;
|
std::uint16_t isBranch_ = 0;
|
||||||
|
|
||||||
static std::mutex childLock;
|
/** A bitlock for the children of this node, with one bit per child */
|
||||||
|
mutable std::atomic<std::uint16_t> lock_ = 0;
|
||||||
|
|
||||||
/** Convert arrays stored in `hashesAndChildren_` so they can store the
|
/** Convert arrays stored in `hashesAndChildren_` so they can store the
|
||||||
requested number of children.
|
requested number of children.
|
||||||
@@ -155,7 +158,7 @@ public:
|
|||||||
std::shared_ptr<SHAMapTreeNode>
|
std::shared_ptr<SHAMapTreeNode>
|
||||||
getChild(int branch);
|
getChild(int branch);
|
||||||
|
|
||||||
virtual std::shared_ptr<SHAMapTreeNode>
|
std::shared_ptr<SHAMapTreeNode>
|
||||||
canonicalizeChild(int branch, std::shared_ptr<SHAMapTreeNode> node);
|
canonicalizeChild(int branch, std::shared_ptr<SHAMapTreeNode> node);
|
||||||
|
|
||||||
// sync functions
|
// sync functions
|
||||||
|
|||||||
@@ -19,11 +19,9 @@
|
|||||||
|
|
||||||
#include <ripple/shamap/SHAMapInnerNode.h>
|
#include <ripple/shamap/SHAMapInnerNode.h>
|
||||||
|
|
||||||
#include <ripple/basics/ByteUtilities.h>
|
|
||||||
#include <ripple/basics/Log.h>
|
#include <ripple/basics/Log.h>
|
||||||
#include <ripple/basics/Slice.h>
|
#include <ripple/basics/Slice.h>
|
||||||
#include <ripple/basics/contract.h>
|
#include <ripple/basics/contract.h>
|
||||||
#include <ripple/basics/safe_cast.h>
|
|
||||||
#include <ripple/beast/core/LexicalCast.h>
|
#include <ripple/beast/core/LexicalCast.h>
|
||||||
#include <ripple/protocol/HashPrefix.h>
|
#include <ripple/protocol/HashPrefix.h>
|
||||||
#include <ripple/protocol/digest.h>
|
#include <ripple/protocol/digest.h>
|
||||||
@@ -33,14 +31,88 @@
|
|||||||
#include <openssl/sha.h>
|
#include <openssl/sha.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <array>
|
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <mutex>
|
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
|
// This is used for the _mm_pause instruction:
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
namespace ripple {
|
namespace ripple {
|
||||||
|
|
||||||
std::mutex SHAMapInnerNode::childLock;
|
/** A specialized 16-way spinlock used to protect inner node branches.
|
||||||
|
|
||||||
|
This class packs 16 separate spinlocks into a single 16-bit value. It makes
|
||||||
|
it possible to lock any one lock at once or, alternatively, all together.
|
||||||
|
|
||||||
|
The implementation tries to use portable constructs but has to be low-level
|
||||||
|
for performance.
|
||||||
|
*/
|
||||||
|
class SpinBitlock
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
std::atomic<std::uint16_t>& bits_;
|
||||||
|
std::uint16_t mask_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
SpinBitlock(std::atomic<std::uint16_t>& lock) : bits_(lock), mask_(0xFFFF)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
SpinBitlock(std::atomic<std::uint16_t>& lock, int index)
|
||||||
|
: bits_(lock), mask_(1 << index)
|
||||||
|
{
|
||||||
|
assert(index >= 0 && index < 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool
|
||||||
|
try_lock()
|
||||||
|
{
|
||||||
|
// If we want to grab all the individual bitlocks at once we cannot
|
||||||
|
// use `fetch_or`! To see why, imagine that `lock_ == 0x0020` which
|
||||||
|
// means that the `fetch_or` would return `0x0020` but all the bits
|
||||||
|
// would already be (incorrectly!) set. Oops!
|
||||||
|
std::uint16_t expected = 0;
|
||||||
|
|
||||||
|
if (mask_ != 0xFFFF)
|
||||||
|
return (bits_.fetch_or(mask_, std::memory_order_acquire) & mask_) ==
|
||||||
|
expected;
|
||||||
|
|
||||||
|
return bits_.compare_exchange_weak(
|
||||||
|
expected,
|
||||||
|
mask_,
|
||||||
|
std::memory_order_acquire,
|
||||||
|
std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
lock()
|
||||||
|
{
|
||||||
|
// Testing suggests that 99.9999% of the time this will succeed, so
|
||||||
|
// we try to optimize the fast path.
|
||||||
|
if (try_lock())
|
||||||
|
return;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
// We try to spin for a few times:
|
||||||
|
for (int i = 0; i != 100; ++i)
|
||||||
|
{
|
||||||
|
if (try_lock())
|
||||||
|
return;
|
||||||
|
|
||||||
|
_mm_pause();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::this_thread::yield();
|
||||||
|
} while ((bits_.load(std::memory_order_relaxed) & mask_) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
unlock()
|
||||||
|
{
|
||||||
|
bits_.fetch_and(~mask_, std::memory_order_release);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
SHAMapInnerNode::SHAMapInnerNode(
|
SHAMapInnerNode::SHAMapInnerNode(
|
||||||
std::uint32_t cowid,
|
std::uint32_t cowid,
|
||||||
@@ -108,7 +180,10 @@ SHAMapInnerNode::clone(std::uint32_t cowid) const
|
|||||||
cloneHashes[branchNum] = thisHashes[indexNum];
|
cloneHashes[branchNum] = thisHashes[indexNum];
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
std::lock_guard lock(childLock);
|
|
||||||
|
SpinBitlock sl(lock_);
|
||||||
|
std::lock_guard lock(sl);
|
||||||
|
|
||||||
if (thisIsSparse)
|
if (thisIsSparse)
|
||||||
{
|
{
|
||||||
int cloneChildIndex = 0;
|
int cloneChildIndex = 0;
|
||||||
@@ -341,8 +416,11 @@ SHAMapInnerNode::getChildPointer(int branch)
|
|||||||
assert(branch >= 0 && branch < branchFactor);
|
assert(branch >= 0 && branch < branchFactor);
|
||||||
assert(!isEmptyBranch(branch));
|
assert(!isEmptyBranch(branch));
|
||||||
|
|
||||||
std::lock_guard lock(childLock);
|
auto const index = *getChildIndex(branch);
|
||||||
return hashesAndChildren_.getChildren()[*getChildIndex(branch)].get();
|
|
||||||
|
SpinBitlock sl(lock_, index);
|
||||||
|
std::lock_guard lock(sl);
|
||||||
|
return hashesAndChildren_.getChildren()[index].get();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<SHAMapTreeNode>
|
std::shared_ptr<SHAMapTreeNode>
|
||||||
@@ -351,8 +429,11 @@ SHAMapInnerNode::getChild(int branch)
|
|||||||
assert(branch >= 0 && branch < branchFactor);
|
assert(branch >= 0 && branch < branchFactor);
|
||||||
assert(!isEmptyBranch(branch));
|
assert(!isEmptyBranch(branch));
|
||||||
|
|
||||||
std::lock_guard lock(childLock);
|
auto const index = *getChildIndex(branch);
|
||||||
return hashesAndChildren_.getChildren()[*getChildIndex(branch)];
|
|
||||||
|
SpinBitlock sl(lock_, index);
|
||||||
|
std::lock_guard lock(sl);
|
||||||
|
return hashesAndChildren_.getChildren()[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
SHAMapHash const&
|
SHAMapHash const&
|
||||||
@@ -377,7 +458,9 @@ SHAMapInnerNode::canonicalizeChild(
|
|||||||
auto [_, hashes, children] = hashesAndChildren_.getHashesAndChildren();
|
auto [_, hashes, children] = hashesAndChildren_.getHashesAndChildren();
|
||||||
assert(node->getHash() == hashes[childIndex]);
|
assert(node->getHash() == hashes[childIndex]);
|
||||||
|
|
||||||
std::lock_guard lock(childLock);
|
SpinBitlock sl(lock_, childIndex);
|
||||||
|
std::lock_guard lock(sl);
|
||||||
|
|
||||||
if (children[childIndex])
|
if (children[childIndex])
|
||||||
{
|
{
|
||||||
// There is already a node hooked up, return it
|
// There is already a node hooked up, return it
|
||||||
|
|||||||
@@ -583,7 +583,6 @@ SHAMap::addKnownNode(
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto const generation = f_.getFullBelowCache(ledgerSeq_)->getGeneration();
|
auto const generation = f_.getFullBelowCache(ledgerSeq_)->getGeneration();
|
||||||
auto newNode = SHAMapTreeNode::makeFromWire(rawNode);
|
|
||||||
SHAMapNodeID iNodeID;
|
SHAMapNodeID iNodeID;
|
||||||
auto iNode = root_.get();
|
auto iNode = root_.get();
|
||||||
|
|
||||||
@@ -612,6 +611,8 @@ SHAMap::addKnownNode(
|
|||||||
|
|
||||||
if (iNode == nullptr)
|
if (iNode == nullptr)
|
||||||
{
|
{
|
||||||
|
auto newNode = SHAMapTreeNode::makeFromWire(rawNode);
|
||||||
|
|
||||||
if (!newNode || childHash != newNode->getHash())
|
if (!newNode || childHash != newNode->getHash())
|
||||||
{
|
{
|
||||||
JLOG(journal_.warn()) << "Corrupt node received";
|
JLOG(journal_.warn()) << "Corrupt node received";
|
||||||
|
|||||||
@@ -17,9 +17,9 @@
|
|||||||
*/
|
*/
|
||||||
//==============================================================================
|
//==============================================================================
|
||||||
|
|
||||||
#include <ripple/shamap/impl/TaggedPointer.h>
|
#include <ripple/basics/ByteUtilities.h>
|
||||||
|
|
||||||
#include <ripple/shamap/SHAMapInnerNode.h>
|
#include <ripple/shamap/SHAMapInnerNode.h>
|
||||||
|
#include <ripple/shamap/impl/TaggedPointer.h>
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user