mirror of
https://github.com/XRPLF/clio.git
synced 2026-06-03 00:36:44 +00:00
131 lines
5.7 KiB
C++
131 lines
5.7 KiB
C++
#pragma once
|
|
|
|
#include "cluster/Backend.hpp"
|
|
#include "cluster/ClioNode.hpp"
|
|
#include "cluster/impl/FallbackRecoveryTimer.hpp"
|
|
#include "etl/WriterState.hpp"
|
|
|
|
#include <boost/asio/thread_pool.hpp>
|
|
|
|
#include <chrono>
|
|
#include <memory>
|
|
|
|
namespace cluster {
|
|
|
|
/**
|
|
* @brief Decides which node in the cluster should be the writer based on cluster state.
|
|
*
|
|
* This class monitors cluster state changes and determines whether the current node
|
|
* should act as the writer to the database.
|
|
*
|
|
* ## Election (normal operation)
|
|
*
|
|
* All non-ReadOnly nodes are sorted by UUID. The first node with @c etlStarted and
|
|
* @c cacheIsFull is elected writer. If no fully-ready node exists, the first node
|
|
* with @c etlStarted is chosen. All others give up writing.
|
|
*
|
|
* ## Fallback mode
|
|
*
|
|
* Fallback is the slower but more reliable mechanism based on database write-conflict
|
|
* detection (a node waits ~10 s of DB silence before writing). The cluster enters
|
|
* fallback whenever any non-ReadOnly node publishes @c DbRole::Fallback — for example
|
|
* during a rolling upgrade when an old node without cluster-coordination support is
|
|
* present.
|
|
*
|
|
* ## Fallback recovery
|
|
*
|
|
* To avoid the cluster staying in fallback indefinitely, a recovery timer is started
|
|
* when this node enters fallback. After the timer fires the node enters
|
|
* @c DbRole::FallbackRecovery and coordinates with peers to return to election mode.
|
|
* If any peer is already in @c FallbackRecovery, the node joins immediately (contagion
|
|
* rule), cancelling its own pending timer.
|
|
*
|
|
* ## State machine for `onNewState`
|
|
*
|
|
* @code
|
|
*
|
|
* sees any Fallback node
|
|
* [election mode] ──────────────────────────────► [Fallback]
|
|
* (NotWriter / │
|
|
* Writer) recovery timer fires
|
|
* ▲ (1 hour)
|
|
* │ OR sees FallbackRecovery
|
|
* │ node (contagion rule)
|
|
* │ │
|
|
* │ ▼
|
|
* │ no Fallback nodes visible [FallbackRecovery]
|
|
* └─────────────────────────────────────────────────
|
|
*
|
|
* @endcode
|
|
*
|
|
* Nodes in FallbackRecovery continue the fallback write-race so there is no write
|
|
* availability gap during the coordination phase.
|
|
*/
|
|
class WriterDecider {
|
|
public:
|
|
static constexpr std::chrono::steady_clock::duration kRECOVERY_TIME = std::chrono::hours{1};
|
|
|
|
private:
|
|
/** @brief Thread pool for spawning asynchronous tasks */
|
|
boost::asio::thread_pool& ctx_;
|
|
|
|
/** @brief Interface for controlling the writer state of this node */
|
|
std::unique_ptr<etl::WriterStateInterface> writerState_;
|
|
|
|
/**
|
|
* @brief Timer that fires after a delay to initiate fallback recovery.
|
|
*
|
|
* Started when this node enters @c DbRole::Fallback (either via election-mode
|
|
* transition or via an externally triggered fallback). Cancelled when the node
|
|
* transitions to @c DbRole::FallbackRecovery (timer fired or contagion rule).
|
|
* Copied into spawned task closures by value — all copies share the same
|
|
* underlying mutex-protected state.
|
|
*/
|
|
impl::FallbackRecoveryTimer fallbackRecoveryTimer_;
|
|
|
|
public:
|
|
/**
|
|
* @brief Constructs a WriterDecider.
|
|
*
|
|
* @param ctx Thread pool for executing asynchronous operations
|
|
* @param writerState Writer state interface for controlling write operations
|
|
* @param recoveryTime How long to wait in Fallback before attempting recovery
|
|
* (defaults to `kRECOVERY_TIME`; pass a short duration in tests)
|
|
*/
|
|
WriterDecider(
|
|
boost::asio::thread_pool& ctx,
|
|
std::unique_ptr<etl::WriterStateInterface> writerState,
|
|
std::chrono::steady_clock::duration recoveryTime = kRECOVERY_TIME
|
|
);
|
|
|
|
/**
|
|
* @brief Handles cluster state changes and decides whether this node should be the writer.
|
|
*
|
|
* Spawns an asynchronous task that applies the state machine described in the class
|
|
* documentation. Decisions are based on the @p clusterData snapshot:
|
|
*
|
|
* - If @p clusterData has no value (communication failure), no action is taken.
|
|
* - If self is @c ReadOnly, writing is given up unconditionally.
|
|
* - If self is @c Fallback and a @c FallbackRecovery node is visible, the contagion
|
|
* rule applies: this node also enters @c FallbackRecovery and the recovery timer
|
|
* is cancelled.
|
|
* - If self is @c Fallback and the recovery timer is not running, it is started
|
|
* (handles the case where fallback was triggered externally, e.g. by Monitor).
|
|
* - If self is @c FallbackRecovery and no @c Fallback nodes are visible, the
|
|
* recovery coordination is complete: writing is given up and the fallback recovery
|
|
* flag is cleared so the node enters election mode on the next cycle.
|
|
* - If self is in election mode and any @c Fallback node is visible, this node
|
|
* switches to @c Fallback and the recovery timer is started.
|
|
* - Otherwise, election proceeds: nodes are sorted by UUID and the first fully-ready
|
|
* (@c etlStarted && @c cacheIsFull) non-ReadOnly node is elected writer.
|
|
*
|
|
* @param selfId The UUID of the current node
|
|
* @param clusterData Shared pointer to current cluster data; may be empty if
|
|
* communication failed
|
|
*/
|
|
void
|
|
onNewState(ClioNode::CUuid selfId, std::shared_ptr<Backend::ClusterData const> clusterData);
|
|
};
|
|
|
|
} // namespace cluster
|