mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-03 00:36:48 +00:00
348 lines
13 KiB
C++
348 lines
13 KiB
C++
#pragma once
|
||
|
||
#include <xrpl/basics/base_uint.h>
|
||
#include <xrpl/basics/hardened_hash.h>
|
||
#include <xrpl/protocol/Keylet.h>
|
||
#include <xrpl/protocol/STLedgerEntry.h>
|
||
|
||
#include <cstddef>
|
||
#include <iterator>
|
||
#include <memory>
|
||
#include <shared_mutex>
|
||
#include <unordered_map>
|
||
#include <unordered_set>
|
||
#include <vector>
|
||
|
||
namespace xrpl {
|
||
|
||
/** Flat keylet-indexed materialization of XRPL state.
|
||
|
||
The SHAMap is XRPL's authoritative state structure — it produces
|
||
the state root that consensus agrees on. But on its own, it forces
|
||
every state read to walk the trie: ~6–10 spinlocked inner-node
|
||
fetches per `read(Keylet)`.
|
||
|
||
`FlatStateMap` materializes the same keylet → SLE mapping into a
|
||
flat hash table. Once populated, lookups are a single
|
||
`unordered_map::find()` — nanoseconds, not microseconds.
|
||
|
||
This is the 2-writes-for-1-read pattern (Plan 6):
|
||
* On apply, the apply path dual-writes to SHAMap and FlatStateMap.
|
||
* On read, only FlatStateMap is consulted.
|
||
|
||
The flat map is purely auxiliary. The SHAMap remains authoritative,
|
||
can always be rebuilt from the underlying NodeStore, and is what the
|
||
network's state-root commitment is computed from. If FlatStateMap is
|
||
wrong, the differential invariant check at ledger close (Plan 6 P6.5)
|
||
catches it; there is **no runtime fallback to SHAMap descent on
|
||
miss** — a miss is a bug.
|
||
|
||
Thread-safe via a shared_mutex. Concurrent reads do not block each
|
||
other; writes are exclusive. Future phases may replace this with
|
||
a lock-free atomic-pointer-swap design.
|
||
*/
|
||
class FlatStateMap
|
||
{
|
||
public:
|
||
using key_type = uint256;
|
||
using value_type = std::shared_ptr<STLedgerEntry const>;
|
||
|
||
FlatStateMap() = default;
|
||
~FlatStateMap() = default;
|
||
|
||
// Non-copyable, non-movable. The map owns a shared_mutex (not movable)
|
||
// and a potentially large hash table; callers that need ownership
|
||
// transfer should wrap in std::unique_ptr<FlatStateMap>.
|
||
FlatStateMap(FlatStateMap const&) = delete;
|
||
FlatStateMap&
|
||
operator=(FlatStateMap const&) = delete;
|
||
FlatStateMap(FlatStateMap&&) = delete;
|
||
FlatStateMap&
|
||
operator=(FlatStateMap&&) = delete;
|
||
|
||
/** Look up an SLE by its SHAMap key.
|
||
|
||
Returns nullptr if the key is not in the map. In Plan 6's pure
|
||
2w/1r model, callers reading state that *should* exist treat
|
||
nullptr as a precondition violation — there is no fallback path
|
||
that would recover from a missed entry.
|
||
*/
|
||
[[nodiscard]] value_type
|
||
read(key_type const& key) const;
|
||
|
||
/** Test whether an SLE is present. O(1). */
|
||
[[nodiscard]] bool
|
||
exists(key_type const& key) const;
|
||
|
||
/** Insert a new SLE. Replaces any prior entry under the same key.
|
||
|
||
Used by both:
|
||
* the apply path's `view.insert()` (new ledger object), and
|
||
* the apply path's `view.update()` (mutating an existing SLE
|
||
produces a new shared_ptr value).
|
||
*/
|
||
void
|
||
insert(key_type const& key, value_type sle);
|
||
|
||
/** Remove an SLE. No-op if the key is absent. */
|
||
void
|
||
erase(key_type const& key);
|
||
|
||
/** Number of SLEs currently materialized. */
|
||
[[nodiscard]] std::size_t
|
||
size() const;
|
||
|
||
/** Test whether the map is empty. O(1). */
|
||
[[nodiscard]] bool
|
||
empty() const;
|
||
|
||
/** Remove every entry. Used by tests and by snapshot reset. */
|
||
void
|
||
clear();
|
||
|
||
/** Deep-copy snapshot of the current state.
|
||
|
||
The snapshot is a frozen FlatStateMap (returned by value-like
|
||
unique_ptr) that shares the underlying SLE objects via
|
||
shared_ptr but has its own hash table. Subsequent writes to the
|
||
source FlatStateMap do not affect the snapshot.
|
||
|
||
Snapshot cost is O(N) in entry count — ~one pointer copy per
|
||
entry plus bucket allocation. For current mainnet (~10M SLEs)
|
||
this is ~100–200 ms; expensive enough that snapshots should be
|
||
per-ledger-close, not per-transaction.
|
||
|
||
A future phase will replace this with a persistent / HAMT
|
||
structure that gives O(log N) snapshot and structural sharing
|
||
across versions.
|
||
*/
|
||
[[nodiscard]] std::unique_ptr<FlatStateMap>
|
||
snapshot() const;
|
||
|
||
/** Visit every (key, SLE) pair under a single shared lock.
|
||
|
||
@param visitor Called as `void(key_type const&, value_type const&)`.
|
||
|
||
The lock is held for the duration of the iteration; visitors
|
||
must not call back into the same FlatStateMap (deadlock /
|
||
recursive shared_lock UB). Visitors that want to mutate state
|
||
should collect keys first and apply mutations after iteration
|
||
returns.
|
||
*/
|
||
template <typename F>
|
||
void
|
||
forEach(F&& visitor) const
|
||
{
|
||
std::shared_lock<std::shared_mutex> lock(mutex_);
|
||
for (auto const& [key, sle] : map_)
|
||
visitor(key, sle);
|
||
}
|
||
|
||
private:
|
||
using HashFn = HardenedHash<>;
|
||
using MapType = std::unordered_map<key_type, value_type, HashFn>;
|
||
|
||
mutable std::shared_mutex mutex_;
|
||
MapType map_;
|
||
};
|
||
|
||
// Forward declarations to avoid pulling heavy headers into this file.
|
||
class ReadView;
|
||
class Ledger;
|
||
|
||
/** Populate a FlatStateMap from every SLE in a ReadView.
|
||
|
||
Used at node startup to build the flat materialization from a
|
||
SHAMap-backed authoritative ledger. Cost is O(N) iterations of
|
||
`view.sles`, each of which descends the SHAMap, so this is
|
||
expected to take seconds-to-minutes for a current mainnet-sized
|
||
ledger (~10M SLEs). Run once on startup; subsequent ledgers are
|
||
maintained incrementally via dual-write at apply time (P6.3).
|
||
|
||
@pre `target` is empty. (Not enforced — replacing existing entries
|
||
is well-defined, but mixing populated state with an externally-
|
||
provided ReadView is a bug-shaped pattern; assert in DEBUG.)
|
||
*/
|
||
void
|
||
populateFromReadView(FlatStateMap& target, ReadView const& source);
|
||
|
||
/** Populate a FlatStateMap from any forward range of `shared_ptr<SLE const>`.
|
||
|
||
Lower-level building block underlying `populateFromReadView`. Useful
|
||
in tests (the range can be a `std::vector<shared_ptr<SLE const>>`)
|
||
and in non-ReadView contexts (e.g., reloading a persisted flat-map
|
||
sidecar at startup).
|
||
|
||
The range element type must be `shared_ptr<SLE const>` (or
|
||
implicitly convertible). Each element's `.key()` becomes the
|
||
FlatStateMap key.
|
||
*/
|
||
template <typename Range>
|
||
void
|
||
populateFromRange(FlatStateMap& target, Range const& sles)
|
||
{
|
||
for (auto const& sle : sles)
|
||
target.insert(sle->key(), sle);
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Mirror helpers (P6.3).
|
||
//
|
||
// The xrpld `RawView` interface defines three pure-virtual methods that
|
||
// every state mutation flows through: `rawInsert`, `rawReplace`, and
|
||
// `rawErase`. In plan-6's 2-writes-for-1-read pattern, every such call
|
||
// must also update the flat map. These helpers perform the flat-map side
|
||
// of that dual-write — they exist as standalone functions (rather than
|
||
// methods on FlatStateMap) so the Ledger integration is a one-line
|
||
// addition at each `raw*` override site, with no FlatStateMap class
|
||
// surface added for purely Ledger-specific semantics.
|
||
//
|
||
// Permissive semantics: `mirrorRawReplace` on an absent key inserts;
|
||
// `mirrorRawErase` on an absent key is a no-op. The SHAMap side enforces
|
||
// the precondition (replace requires existence); the flat mirror ensures
|
||
// the post-state matches whatever the SHAMap committed. If the caller
|
||
// gets it wrong, the differential invariant check at close (P6.5) is the
|
||
// stop-the-line gate.
|
||
// ---------------------------------------------------------------------------
|
||
|
||
void
|
||
mirrorRawInsert(FlatStateMap& map, std::shared_ptr<STLedgerEntry const> sle);
|
||
|
||
void
|
||
mirrorRawReplace(FlatStateMap& map, std::shared_ptr<STLedgerEntry const> sle);
|
||
|
||
void
|
||
mirrorRawErase(FlatStateMap& map, std::shared_ptr<STLedgerEntry const> const& sle);
|
||
|
||
void
|
||
mirrorRawErase(FlatStateMap& map, uint256 const& key);
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Keylet-aware read (P6.4).
|
||
//
|
||
// This is the read-side counterpart to `mirrorRaw*` — the testable unit
|
||
// underlying `Ledger::read(Keylet)`'s flat-map path. It looks up the
|
||
// SLE by `k.key` and verifies the SLE matches the keylet's expected
|
||
// type via `Keylet::check`. On either a miss or a type mismatch, it
|
||
// returns nullptr — matching the contract of `Ledger::read`.
|
||
//
|
||
// Plan 6 v2 semantics: this function does not consult a SHAMap or any
|
||
// other source on miss. When a FlatStateMap is the read source of
|
||
// truth, a miss IS the answer. The differential invariant check at
|
||
// close (P6.5) is what makes that safe.
|
||
// ---------------------------------------------------------------------------
|
||
|
||
std::shared_ptr<STLedgerEntry const>
|
||
readFromFlatStateMap(FlatStateMap const& map, Keylet const& k);
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Differential invariant (P6.5).
|
||
//
|
||
// At every ledger close, the flat map's key-set must match the
|
||
// SHAMap's key-set. `diffFlatStateKeys` produces both sides of the
|
||
// disagreement; `flatStateMapMatches` is the boolean predicate the
|
||
// hot-path integration calls (and fails the close if it returns false).
|
||
//
|
||
// Content drift (right keys, wrong SLE bodies) is a separate, stronger
|
||
// invariant. It's prevented by construction: the mirror helpers write
|
||
// exactly the SLE the caller passed to raw*. If mirror helpers and
|
||
// wiring are both correct, the membership check above is sufficient.
|
||
// ---------------------------------------------------------------------------
|
||
|
||
struct FlatStateKeyDiff
|
||
{
|
||
std::vector<uint256> missingFromFlat;
|
||
std::vector<uint256> extraInFlat;
|
||
};
|
||
|
||
template <typename SourceRange>
|
||
[[nodiscard]] FlatStateKeyDiff
|
||
diffFlatStateKeys(FlatStateMap const& flat, SourceRange const& sourceKeys)
|
||
{
|
||
FlatStateKeyDiff diff;
|
||
|
||
// Pass 1: walk source keys; collect any absent from flat. Record
|
||
// which keys we've seen so pass 2 can spot phantoms.
|
||
std::unordered_set<uint256, HardenedHash<>> seen;
|
||
seen.reserve(static_cast<std::size_t>(std::distance(
|
||
std::begin(sourceKeys), std::end(sourceKeys))));
|
||
|
||
for (auto const& key : sourceKeys)
|
||
{
|
||
seen.insert(key);
|
||
if (!flat.exists(key))
|
||
diff.missingFromFlat.push_back(key);
|
||
}
|
||
|
||
// Pass 2: walk flat; anything not in `seen` is a phantom.
|
||
flat.forEach(
|
||
[&seen, &diff](uint256 const& key, auto const& /*sle*/) {
|
||
if (!seen.contains(key))
|
||
diff.extraInFlat.push_back(key);
|
||
});
|
||
|
||
return diff;
|
||
}
|
||
|
||
template <typename SourceRange>
|
||
[[nodiscard]] bool
|
||
flatStateMapMatches(FlatStateMap const& flat, SourceRange const& sourceKeys)
|
||
{
|
||
auto const diff = diffFlatStateKeys(flat, sourceKeys);
|
||
return diff.missingFromFlat.empty() && diff.extraInFlat.empty();
|
||
}
|
||
|
||
/** Compare a FlatStateMap against a SHAMap-like source.
|
||
|
||
`ShaMapLike` is any range whose elements expose a `key()` accessor
|
||
returning a `uint256`. The real `SHAMap` satisfies this contract
|
||
(its iterators yield `SHAMapItem`s with `.key()`), as does the
|
||
`MockShaMapItem` used in tests.
|
||
|
||
This is the helper the Ledger integration calls to run the P6.5
|
||
differential invariant. It extracts keys into a transient buffer
|
||
(O(N) allocation, ~N pointers' worth of memory) and forwards to
|
||
`flatStateMapMatches`. The transient buffer is acceptable at close
|
||
cadence; the integration can later optimize by walking the SHAMap
|
||
in-place once profiling shows the allocation matters.
|
||
*/
|
||
template <typename ShaMapLike>
|
||
[[nodiscard]] bool
|
||
flatStateMapMatchesShaMap(FlatStateMap const& flat, ShaMapLike const& shaMap)
|
||
{
|
||
std::vector<uint256> keys;
|
||
for (auto const& item : shaMap)
|
||
keys.push_back(item.key());
|
||
return flatStateMapMatches(flat, keys);
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// A-phase integration: attach a populated FlatStateMap to a Ledger.
|
||
//
|
||
// This is the public entry point a node or test uses to "turn on" the
|
||
// flat-map read path for a Ledger. The function:
|
||
// 1. Allocates a new FlatStateMap.
|
||
// 2. Eagerly populates it by walking every SLE in the Ledger.
|
||
// 3. Attaches it via `Ledger::setFlatStateMap`.
|
||
//
|
||
// After this call:
|
||
// * `Ledger::flatStateMap()` returns the populated map
|
||
// * `Ledger::read(keylet)` routes through the flat map (no SHAMap
|
||
// descent on the hot path; see P6.4 wiring)
|
||
// * `Ledger::raw{Insert,Replace,Erase}` mirror writes to the flat
|
||
// map alongside the SHAMap (see P6.3 wiring)
|
||
// * `Ledger::validateFlatStateMapMatchesShaMap()` returns true
|
||
//
|
||
// Repeat calls discard the prior map and produce a fresh one.
|
||
//
|
||
// Cost: O(N) walk over the Ledger's state SHAMap to populate the flat
|
||
// map. For mainnet-scale state, this is bounded by SHAMap traversal
|
||
// speed — typically minutes once. Run at node startup or whenever a
|
||
// Ledger first becomes "live" (the one apply writes to).
|
||
// ---------------------------------------------------------------------------
|
||
|
||
void
|
||
attachFlatStateMapTo(Ledger& ledger);
|
||
|
||
} // namespace xrpl
|