mirror of
https://github.com/XRPLF/rippled.git
synced 2025-11-20 02:55:50 +00:00
Fix: Resolve slow test on macOS pipeline (#5392)
Using std::barrier performs extremely poorly (~1 hour vs ~1 minute to run the test suite) in certain macOS environments. To unblock our macOS CI pipeline, std::barrier has been replaced with a custom mutex-based barrier (Barrier) that significantly improves performance without compromising correctness.
This commit is contained in:
committed by
GitHub
parent
c3e9380fb4
commit
380ba9f1c1
14
.github/workflows/macos.yml
vendored
14
.github/workflows/macos.yml
vendored
@@ -71,6 +71,9 @@ jobs:
|
|||||||
nproc --version
|
nproc --version
|
||||||
echo -n "nproc returns: "
|
echo -n "nproc returns: "
|
||||||
nproc
|
nproc
|
||||||
|
system_profiler SPHardwareDataType
|
||||||
|
sysctl -n hw.logicalcpu
|
||||||
|
clang --version
|
||||||
- name: configure Conan
|
- name: configure Conan
|
||||||
run : |
|
run : |
|
||||||
conan profile new default --detect || true
|
conan profile new default --detect || true
|
||||||
@@ -89,9 +92,8 @@ jobs:
|
|||||||
generator: ${{ matrix.generator }}
|
generator: ${{ matrix.generator }}
|
||||||
configuration: ${{ matrix.configuration }}
|
configuration: ${{ matrix.configuration }}
|
||||||
cmake-args: "-Dassert=TRUE -Dwerr=TRUE ${{ matrix.cmake-args }}"
|
cmake-args: "-Dassert=TRUE -Dwerr=TRUE ${{ matrix.cmake-args }}"
|
||||||
# TODO: Temporary disabled tests
|
- name: test
|
||||||
# - name: test
|
run: |
|
||||||
# run: |
|
n=$(nproc)
|
||||||
# n=$(nproc)
|
echo "Using $n test jobs"
|
||||||
# echo "Using $n test jobs"
|
${build_dir}/rippled --unittest --unittest-jobs $n
|
||||||
# ${build_dir}/rippled --unittest --unittest-jobs $n
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <barrier>
|
#include <barrier>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <condition_variable>
|
||||||
#include <latch>
|
#include <latch>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <random>
|
#include <random>
|
||||||
@@ -19,6 +20,55 @@
|
|||||||
namespace ripple {
|
namespace ripple {
|
||||||
namespace tests {
|
namespace tests {
|
||||||
|
|
||||||
|
/**
|
||||||
|
Experimentally, we discovered that using std::barrier performs extremely
|
||||||
|
poorly (~1 hour vs ~1 minute to run the test suite) in certain macOS
|
||||||
|
environments. To unblock our macOS CI pipeline, we replaced std::barrier with a
|
||||||
|
custom mutex-based barrier (Barrier) that significantly improves performance
|
||||||
|
without compromising correctness. For future reference, if we ever consider
|
||||||
|
reintroducing std::barrier, the following configuration is known to exhibit the
|
||||||
|
problem:
|
||||||
|
|
||||||
|
Model Name: Mac mini
|
||||||
|
Model Identifier: Mac14,3
|
||||||
|
Model Number: Z16K000R4LL/A
|
||||||
|
Chip: Apple M2
|
||||||
|
Total Number of Cores: 8 (4 performance and 4 efficiency)
|
||||||
|
Memory: 24 GB
|
||||||
|
System Firmware Version: 11881.41.5
|
||||||
|
OS Loader Version: 11881.1.1
|
||||||
|
Apple clang version 16.0.0 (clang-1600.0.26.3)
|
||||||
|
Target: arm64-apple-darwin24.0.0
|
||||||
|
Thread model: posix
|
||||||
|
|
||||||
|
*/
|
||||||
|
struct Barrier
|
||||||
|
{
|
||||||
|
std::mutex mtx;
|
||||||
|
std::condition_variable cv;
|
||||||
|
int count;
|
||||||
|
int const initial;
|
||||||
|
|
||||||
|
Barrier(int n) : count(n), initial(n)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
arrive_and_wait()
|
||||||
|
{
|
||||||
|
std::unique_lock lock(mtx);
|
||||||
|
if (--count == 0)
|
||||||
|
{
|
||||||
|
count = initial;
|
||||||
|
cv.notify_all();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cv.wait(lock, [&] { return count == initial; });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
enum class TrackedState : std::uint8_t {
|
enum class TrackedState : std::uint8_t {
|
||||||
uninitialized,
|
uninitialized,
|
||||||
@@ -500,9 +550,9 @@ public:
|
|||||||
constexpr int loopIters = 2 * 1024;
|
constexpr int loopIters = 2 * 1024;
|
||||||
constexpr int numThreads = 16;
|
constexpr int numThreads = 16;
|
||||||
std::vector<SharedIntrusive<TIBase>> toClone;
|
std::vector<SharedIntrusive<TIBase>> toClone;
|
||||||
std::barrier loopStartSyncPoint{numThreads};
|
Barrier loopStartSyncPoint{numThreads};
|
||||||
std::barrier postCreateToCloneSyncPoint{numThreads};
|
Barrier postCreateToCloneSyncPoint{numThreads};
|
||||||
std::barrier postCreateVecOfPointersSyncPoint{numThreads};
|
Barrier postCreateVecOfPointersSyncPoint{numThreads};
|
||||||
auto engines = [&]() -> std::vector<std::default_random_engine> {
|
auto engines = [&]() -> std::vector<std::default_random_engine> {
|
||||||
std::random_device rd;
|
std::random_device rd;
|
||||||
std::vector<std::default_random_engine> result;
|
std::vector<std::default_random_engine> result;
|
||||||
@@ -628,10 +678,10 @@ public:
|
|||||||
constexpr int flipPointersLoopIters = 256;
|
constexpr int flipPointersLoopIters = 256;
|
||||||
constexpr int numThreads = 16;
|
constexpr int numThreads = 16;
|
||||||
std::vector<SharedIntrusive<TIBase>> toClone;
|
std::vector<SharedIntrusive<TIBase>> toClone;
|
||||||
std::barrier loopStartSyncPoint{numThreads};
|
Barrier loopStartSyncPoint{numThreads};
|
||||||
std::barrier postCreateToCloneSyncPoint{numThreads};
|
Barrier postCreateToCloneSyncPoint{numThreads};
|
||||||
std::barrier postCreateVecOfPointersSyncPoint{numThreads};
|
Barrier postCreateVecOfPointersSyncPoint{numThreads};
|
||||||
std::barrier postFlipPointersLoopSyncPoint{numThreads};
|
Barrier postFlipPointersLoopSyncPoint{numThreads};
|
||||||
auto engines = [&]() -> std::vector<std::default_random_engine> {
|
auto engines = [&]() -> std::vector<std::default_random_engine> {
|
||||||
std::random_device rd;
|
std::random_device rd;
|
||||||
std::vector<std::default_random_engine> result;
|
std::vector<std::default_random_engine> result;
|
||||||
@@ -761,9 +811,9 @@ public:
|
|||||||
constexpr int lockWeakLoopIters = 256;
|
constexpr int lockWeakLoopIters = 256;
|
||||||
constexpr int numThreads = 16;
|
constexpr int numThreads = 16;
|
||||||
std::vector<SharedIntrusive<TIBase>> toLock;
|
std::vector<SharedIntrusive<TIBase>> toLock;
|
||||||
std::barrier loopStartSyncPoint{numThreads};
|
Barrier loopStartSyncPoint{numThreads};
|
||||||
std::barrier postCreateToLockSyncPoint{numThreads};
|
Barrier postCreateToLockSyncPoint{numThreads};
|
||||||
std::barrier postLockWeakLoopSyncPoint{numThreads};
|
Barrier postLockWeakLoopSyncPoint{numThreads};
|
||||||
|
|
||||||
// lockAndDestroy creates weak pointers from the strong pointer
|
// lockAndDestroy creates weak pointers from the strong pointer
|
||||||
// and runs a loop that locks the weak pointer. At the end of the loop
|
// and runs a loop that locks the weak pointer. At the end of the loop
|
||||||
|
|||||||
Reference in New Issue
Block a user