From 0c76bf991ab8e75379083eded4661c5bcddb123b Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Fri, 27 Mar 2026 18:52:18 +0000 Subject: [PATCH] fix: Guard Coro::resume() against completed coroutines (#6608) Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 --- include/xrpl/core/Coro.ipp | 16 +++++++++++++--- include/xrpl/core/JobQueue.h | 10 ++++++---- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/include/xrpl/core/Coro.ipp b/include/xrpl/core/Coro.ipp index e7c08e65a6..dca9504679 100644 --- a/include/xrpl/core/Coro.ipp +++ b/include/xrpl/core/Coro.ipp @@ -70,14 +70,24 @@ JobQueue::Coro::resume() running_ = true; } { - std::lock_guard lock(jq_.m_mutex); + std::lock_guard lk(jq_.m_mutex); --jq_.nSuspend_; } auto saved = detail::getLocalValues().release(); detail::getLocalValues().reset(&lvs_); std::lock_guard lock(mutex_); - XRPL_ASSERT(static_cast(coro_), "xrpl::JobQueue::Coro::resume : is runnable"); - coro_(); + // A late resume() can arrive after the coroutine has already completed. + // This is an expected (if rare) outcome of the race condition documented + // in JobQueue.h:354-377 where post() schedules a resume job before the + // coroutine yields — the mutex serializes access, but by the time this + // resume() acquires the lock the coroutine may have already run to + // completion. Calling operator() on a completed boost::coroutine2 is + // undefined behavior, so we must check and skip invoking the coroutine + // body if it has already completed. + if (coro_) + { + coro_(); + } detail::getLocalValues().release(); detail::getLocalValues().reset(saved); std::lock_guard lk(mutex_run_); diff --git a/include/xrpl/core/JobQueue.h b/include/xrpl/core/JobQueue.h index 583e8bc26a..fdb708ee57 100644 --- a/include/xrpl/core/JobQueue.h +++ b/include/xrpl/core/JobQueue.h @@ -99,8 +99,8 @@ public: Effects: The coroutine continues execution from where it last left off using this same thread. - Undefined behavior if called after the coroutine has completed - with a return (as opposed to a yield()). + If the coroutine has already completed, returns immediately + (handles the documented post-before-yield race condition). Undefined behavior if resume() or post() called consecutively without a corresponding yield. */ @@ -357,8 +357,10 @@ private: If the post() job were to be executed before yield(), undefined behavior would occur. The lock ensures that coro_ is not called again until we exit the coroutine. At which point a scheduled resume() job waiting on the lock - would gain entry, harmlessly call coro_ and immediately return as we have - already completed the coroutine. + would gain entry. resume() checks if the coroutine has already completed + (coro_ converts to false) and, if so, skips invoking operator() since + calling operator() on a completed boost::coroutine2 pull_type is undefined + behavior. The race condition occurs as follows: