mirror of
https://github.com/XRPLF/rippled.git
synced 2025-11-19 18:45:52 +00:00
* Copy Antithesis SDK version 0.4.0 to directory external/ * Add build option `voidstar` to enable instrumentation with Antithesis SDK * Define instrumentation macros ASSERT and UNREACHABLE in terms of regular C assert * Replace asserts with named ASSERT or UNREACHABLE * Add UNREACHABLE to LogicError * Document instrumentation macros in CONTRIBUTING.md
226 lines
7.1 KiB
C++
226 lines
7.1 KiB
C++
/*
|
|
This file is part of rippled: https://github.com/ripple/rippled
|
|
Copyright 2022, Nikolaos D. Bougalis <nikb@bougalis.net>
|
|
|
|
Permission to use, copy, modify, and/or distribute this software for any
|
|
purpose with or without fee is hereby granted, provided that the above
|
|
copyright notice and this permission notice appear in all copies.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*/
|
|
|
|
#ifndef RIPPLE_BASICS_SPINLOCK_H_INCLUDED
|
|
#define RIPPLE_BASICS_SPINLOCK_H_INCLUDED
|
|
|
|
#include <xrpl/beast/utility/instrumentation.h>
|
|
#include <atomic>
|
|
#include <limits>
|
|
#include <type_traits>
|
|
|
|
#ifndef __aarch64__
|
|
#include <immintrin.h>
|
|
#endif
|
|
|
|
namespace ripple {
|
|
|
|
namespace detail {
|
|
/** Inform the processor that we are in a tight spin-wait loop.
|
|
|
|
Spinlocks caught in tight loops can result in the processor's pipeline
|
|
filling up with comparison operations, resulting in a misprediction at
|
|
the time the lock is finally acquired, necessitating pipeline flushing
|
|
which is ridiculously expensive and results in very high latency.
|
|
|
|
This function instructs the processor to "pause" for some architecture
|
|
specific amount of time, to prevent this.
|
|
*/
|
|
inline void
|
|
spin_pause() noexcept
|
|
{
|
|
#ifdef __aarch64__
|
|
asm volatile("yield");
|
|
#else
|
|
_mm_pause();
|
|
#endif
|
|
}
|
|
|
|
} // namespace detail
|
|
|
|
/** @{ */
|
|
/** Classes to handle arrays of spinlocks packed into a single atomic integer:
|
|
|
|
Packed spinlocks allow for tremendously space-efficient lock-sharding
|
|
but they come at a cost.
|
|
|
|
First, the implementation is necessarily low-level and uses advanced
|
|
features like memory ordering and highly platform-specific tricks to
|
|
maximize performance. This imposes a significant and ongoing cost to
|
|
developers.
|
|
|
|
Second, and perhaps most important, is that the packing of multiple
|
|
locks into a single integer which, albeit space-efficient, also has
|
|
performance implications stemming from data dependencies, increased
|
|
cache-coherency traffic between processors and heavier loads on the
|
|
processor's load/store units.
|
|
|
|
To be sure, these locks can have advantages but they are definitely
|
|
not general purpose locks and should not be thought of or used that
|
|
way. The use cases for them are likely few and far between; without
|
|
a compelling reason to use them, backed by profiling data, it might
|
|
be best to use one of the standard locking primitives instead. Note
|
|
that in most common platforms, `std::mutex` is so heavily optimized
|
|
that it can, usually, outperform spinlocks.
|
|
|
|
@tparam T An unsigned integral type (e.g. std::uint16_t)
|
|
*/
|
|
|
|
/** A class that grabs a single packed spinlock from an atomic integer.
|
|
|
|
This class meets the requirements of Lockable:
|
|
https://en.cppreference.com/w/cpp/named_req/Lockable
|
|
*/
|
|
template <class T>
|
|
class packed_spinlock
|
|
{
|
|
// clang-format off
|
|
static_assert(std::is_unsigned_v<T>);
|
|
static_assert(std::atomic<T>::is_always_lock_free);
|
|
static_assert(
|
|
std::is_same_v<decltype(std::declval<std::atomic<T>&>().fetch_or(0)), T> &&
|
|
std::is_same_v<decltype(std::declval<std::atomic<T>&>().fetch_and(0)), T>,
|
|
"std::atomic<T>::fetch_and(T) and std::atomic<T>::fetch_and(T) are required by packed_spinlock");
|
|
// clang-format on
|
|
|
|
private:
|
|
std::atomic<T>& bits_;
|
|
T const mask_;
|
|
|
|
public:
|
|
packed_spinlock(packed_spinlock const&) = delete;
|
|
packed_spinlock&
|
|
operator=(packed_spinlock const&) = delete;
|
|
|
|
/** A single spinlock packed inside the specified atomic
|
|
|
|
@param lock The atomic integer inside which the spinlock is packed.
|
|
@param index The index of the spinlock this object acquires.
|
|
|
|
@note For performance reasons, you should strive to have `lock` be
|
|
on a cacheline by itself.
|
|
*/
|
|
packed_spinlock(std::atomic<T>& lock, int index)
|
|
: bits_(lock), mask_(static_cast<T>(1) << index)
|
|
{
|
|
ASSERT(
|
|
index >= 0 && (mask_ != 0),
|
|
"ripple::packed_spinlock::packed_spinlock : valid index and mask");
|
|
}
|
|
|
|
[[nodiscard]] bool
|
|
try_lock()
|
|
{
|
|
return (bits_.fetch_or(mask_, std::memory_order_acquire) & mask_) == 0;
|
|
}
|
|
|
|
void
|
|
lock()
|
|
{
|
|
while (!try_lock())
|
|
{
|
|
// The use of relaxed memory ordering here is intentional and
|
|
// serves to help reduce cache coherency traffic during times
|
|
// of contention by avoiding writes that would definitely not
|
|
// result in the lock being acquired.
|
|
while ((bits_.load(std::memory_order_relaxed) & mask_) != 0)
|
|
detail::spin_pause();
|
|
}
|
|
}
|
|
|
|
void
|
|
unlock()
|
|
{
|
|
bits_.fetch_and(~mask_, std::memory_order_release);
|
|
}
|
|
};
|
|
|
|
/** A spinlock implemented on top of an atomic integer.
|
|
|
|
@note Using `packed_spinlock` and `spinlock` against the same underlying
|
|
atomic integer can result in `spinlock` not being able to actually
|
|
acquire the lock during periods of high contention, because of how
|
|
the two locks operate: `spinlock` will spin trying to grab all the
|
|
bits at once, whereas any given `packed_spinlock` will only try to
|
|
grab one bit at a time. Caveat emptor.
|
|
|
|
This class meets the requirements of Lockable:
|
|
https://en.cppreference.com/w/cpp/named_req/Lockable
|
|
*/
|
|
template <class T>
|
|
class spinlock
|
|
{
|
|
static_assert(std::is_unsigned_v<T>);
|
|
static_assert(std::atomic<T>::is_always_lock_free);
|
|
|
|
private:
|
|
std::atomic<T>& lock_;
|
|
|
|
public:
|
|
spinlock(spinlock const&) = delete;
|
|
spinlock&
|
|
operator=(spinlock const&) = delete;
|
|
|
|
/** Grabs the
|
|
|
|
@param lock The atomic integer to spin against.
|
|
|
|
@note For performance reasons, you should strive to have `lock` be
|
|
on a cacheline by itself.
|
|
*/
|
|
spinlock(std::atomic<T>& lock) : lock_(lock)
|
|
{
|
|
}
|
|
|
|
[[nodiscard]] bool
|
|
try_lock()
|
|
{
|
|
T expected = 0;
|
|
|
|
return lock_.compare_exchange_weak(
|
|
expected,
|
|
std::numeric_limits<T>::max(),
|
|
std::memory_order_acquire,
|
|
std::memory_order_relaxed);
|
|
}
|
|
|
|
void
|
|
lock()
|
|
{
|
|
while (!try_lock())
|
|
{
|
|
// The use of relaxed memory ordering here is intentional and
|
|
// serves to help reduce cache coherency traffic during times
|
|
// of contention by avoiding writes that would definitely not
|
|
// result in the lock being acquired.
|
|
while (lock_.load(std::memory_order_relaxed) != 0)
|
|
detail::spin_pause();
|
|
}
|
|
}
|
|
|
|
void
|
|
unlock()
|
|
{
|
|
lock_.store(0, std::memory_order_release);
|
|
}
|
|
};
|
|
/** @} */
|
|
|
|
} // namespace ripple
|
|
|
|
#endif
|