Compare commits

...

2 Commits

Author SHA1 Message Date
Pratik Mankawde
385fc767f4 fix: Restore explicit lk.unlock() in Workers::stop()
Keep the explicit unlock before returning from stop() to release
the mutex as soon as it is no longer needed.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-18 12:20:49 +00:00
Pratik Mankawde
37b7dcbf99 fix: Fix Workers::stop() race between m_allPaused and m_runningTaskCount
Replace the post-wait assertion in Workers::stop() with a compound
condition variable predicate that waits for both m_allPaused AND
m_runningTaskCount == 0. The old code asserted zero running tasks
after confirming m_allPaused, but these two states use independent
synchronization (mutex vs atomic), allowing a window where m_allPaused
is true while a task is still finishing.

Additionally, notify m_cv when m_runningTaskCount drops to zero,
locking m_mut first to prevent lost wakeups against the predicate
evaluation in stop()'s cv.wait().

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-18 11:56:58 +00:00

View File

@@ -1,5 +1,4 @@
#include <xrpl/beast/core/CurrentThreadName.h>
#include <xrpl/beast/utility/instrumentation.h>
#include <xrpl/core/PerfLog.h>
#include <xrpl/core/detail/Workers.h>
@@ -96,11 +95,13 @@ Workers::stop()
{
setNumberOfThreads(0);
// Wait until all workers have paused AND no tasks are actively running.
// Both conditions are needed because m_allPaused (mutex-protected) and
// m_runningTaskCount (atomic) are not synchronized under the same lock,
// so m_allPaused can momentarily be true while a task is still finishing.
std::unique_lock<std::mutex> lk{m_mut};
m_cv.wait(lk, [this] { return m_allPaused; });
m_cv.wait(lk, [this] { return m_allPaused && numberOfCurrentlyRunningTasks() == 0; });
lk.unlock();
XRPL_ASSERT(numberOfCurrentlyRunningTasks() == 0, "xrpl::Workers::stop : zero running tasks");
}
void
@@ -217,7 +218,18 @@ Workers::Worker::run()
//
++m_workers.m_runningTaskCount;
m_workers.m_callback.processTask(instance_);
--m_workers.m_runningTaskCount;
// When the running task count drops to zero, wake stop() which
// may be waiting for both m_allPaused and zero running tasks.
// Locking m_mut before notify_all() prevents a lost wakeup:
// it serializes against the predicate check inside stop()'s
// cv.wait(), ensuring the notification is not missed between
// the predicate evaluation and the actual sleep.
if (--m_workers.m_runningTaskCount == 0)
{
std::lock_guard lk{m_workers.m_mut};
m_workers.m_cv.notify_all();
}
}
// Any worker that goes into the paused list must