diff --git a/src/ripple/basics/LocalValue.h b/src/ripple/basics/LocalValue.h index a12432ae3..dc4a70ef3 100644 --- a/src/ripple/basics/LocalValue.h +++ b/src/ripple/basics/LocalValue.h @@ -36,6 +36,10 @@ struct LocalValues bool onCoro = true; void* coroPtr = nullptr; // Pointer to owning JobQueue::Coro (if any) + // When true, SHAMap::finishFetch() will poll-wait for missing nodes + // instead of returning empty. Only set by partial sync code paths. + bool partialSyncWait = false; + // Configurable timeout for SHAMap node fetching during partial sync. // Zero means use the default (30s). RPC handlers can set this to // customize poll-wait behavior. @@ -145,6 +149,25 @@ getCurrentCoroPtr() return nullptr; } +// Check if partial sync wait is enabled for the current coroutine context. +inline bool +isPartialSyncWaitEnabled() +{ + auto lvs = detail::getLocalValues().get(); + if (lvs && lvs->onCoro) + return lvs->partialSyncWait; + return false; +} + +// Enable/disable partial sync wait for the current coroutine context. +inline void +setPartialSyncWait(bool enabled) +{ + auto lvs = detail::getLocalValues().get(); + if (lvs && lvs->onCoro) + lvs->partialSyncWait = enabled; +} + // Get the configured fetch timeout for current coroutine context. // Returns 0ms if not in a coroutine or no custom timeout set. inline std::chrono::milliseconds diff --git a/src/ripple/rpc/handlers/SubmitAndWait.cpp b/src/ripple/rpc/handlers/SubmitAndWait.cpp index 77da9a6a1..ee60a3127 100644 --- a/src/ripple/rpc/handlers/SubmitAndWait.cpp +++ b/src/ripple/rpc/handlers/SubmitAndWait.cpp @@ -119,7 +119,8 @@ doSubmitAndWait(RPC::JsonContext& context) timeout = std::chrono::seconds(t); } - // Set coroutine-local fetch timeout for SHAMap operations + // Enable partial sync wait for SHAMap operations + setPartialSyncWait(true); setCoroFetchTimeout( std::chrono::duration_cast(timeout / 2)); diff --git a/src/ripple/rpc/impl/RPCHelpers.cpp b/src/ripple/rpc/impl/RPCHelpers.cpp index fe3ed18ab..3e7d5df18 100644 --- a/src/ripple/rpc/impl/RPCHelpers.cpp +++ b/src/ripple/rpc/impl/RPCHelpers.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -695,6 +696,7 @@ getLedger(T& ledger, LedgerShortcut shortcut, Context& context) if (hash.isNonZero()) { + setPartialSyncWait(true); ledger = context.app.getInboundLedgers().getPartialLedger(hash); // If no InboundLedger exists yet, trigger acquisition and wait if (!ledger) diff --git a/src/ripple/shamap/impl/SHAMap.cpp b/src/ripple/shamap/impl/SHAMap.cpp index 2ad7c37da..aaaae1c13 100644 --- a/src/ripple/shamap/impl/SHAMap.cpp +++ b/src/ripple/shamap/impl/SHAMap.cpp @@ -189,64 +189,69 @@ SHAMap::finishFetch( f_.missingNodeAcquireBySeq(ledgerSeq_, hash.as_uint256()); } - // If we're in a coroutine context, poll-wait for the node - if (auto* coro = static_cast(getCurrentCoroPtr())) - { - using namespace std::chrono; - constexpr auto pollInterval = 50ms; - constexpr auto defaultTimeout = 30s; - // Use coroutine-local timeout if set, otherwise default - auto coroTimeout = getCoroFetchTimeout(); - auto timeout = - coroTimeout.count() > 0 ? coroTimeout : defaultTimeout; - auto const deadline = steady_clock::now() + timeout; - - // Linear backoff for re-requests: 50ms, 100ms, 150ms... up to - // 2s - auto nextRequestDelay = 50ms; - constexpr auto maxRequestDelay = 2000ms; - constexpr auto backoffStep = 50ms; - auto nextRequestTime = steady_clock::now() + nextRequestDelay; - - JLOG(journal_.debug()) - << "finishFetch: waiting for node " << hash; - - while (steady_clock::now() < deadline) + // If partial sync wait is enabled, poll-wait for the node + if (isPartialSyncWaitEnabled()) + if (auto* coro = + static_cast(getCurrentCoroPtr())) { - // Sleep for the poll interval (yields coroutine, frees job - // thread) - coro->sleepFor(pollInterval); + using namespace std::chrono; + constexpr auto pollInterval = 50ms; + constexpr auto defaultTimeout = 30s; + // Use coroutine-local timeout if set, otherwise default + auto coroTimeout = getCoroFetchTimeout(); + auto timeout = + coroTimeout.count() > 0 ? coroTimeout : defaultTimeout; + auto const deadline = steady_clock::now() + timeout; - // Try to fetch from cache/db again - if (auto obj = f_.db().fetchNodeObject( - hash.as_uint256(), ledgerSeq_)) + // Linear backoff for re-requests: 50ms, 100ms, 150ms... up + // to 2s + auto nextRequestDelay = 50ms; + constexpr auto maxRequestDelay = 2000ms; + constexpr auto backoffStep = 50ms; + auto nextRequestTime = + steady_clock::now() + nextRequestDelay; + + JLOG(journal_.debug()) + << "finishFetch: waiting for node " << hash; + + while (steady_clock::now() < deadline) { - JLOG(journal_.debug()) - << "finishFetch: got node " << hash; - auto node = SHAMapTreeNode::makeFromPrefix( - makeSlice(obj->getData()), hash); - if (node) - canonicalize(hash, node); - return node; + // Sleep for the poll interval (yields coroutine, frees + // job thread) + coro->sleepFor(pollInterval); + + // Try to fetch from cache/db again + if (auto obj = f_.db().fetchNodeObject( + hash.as_uint256(), ledgerSeq_)) + { + JLOG(journal_.debug()) + << "finishFetch: got node " << hash; + auto node = SHAMapTreeNode::makeFromPrefix( + makeSlice(obj->getData()), hash); + if (node) + canonicalize(hash, node); + return node; + } + + // Re-request with priority using linear backoff + auto now = steady_clock::now(); + if (now >= nextRequestTime) + { + f_.missingNodeAcquireBySeq( + ledgerSeq_, + hash.as_uint256(), + true /*prioritize*/); + // Increase delay for next request (linear backoff) + if (nextRequestDelay < maxRequestDelay) + nextRequestDelay += backoffStep; + nextRequestTime = now + nextRequestDelay; + } } - // Re-request with priority using linear backoff - auto now = steady_clock::now(); - if (now >= nextRequestTime) - { - f_.missingNodeAcquireBySeq( - ledgerSeq_, hash.as_uint256(), true /*prioritize*/); - // Increase delay for next request (linear backoff) - if (nextRequestDelay < maxRequestDelay) - nextRequestDelay += backoffStep; - nextRequestTime = now + nextRequestDelay; - } + JLOG(journal_.warn()) + << "finishFetch: timeout waiting for node " << hash; } - JLOG(journal_.warn()) - << "finishFetch: timeout waiting for node " << hash; - } - return {}; }