Files
rippled/src/libxrpl/shamap/SHAMapSync.cpp
Jingchen 8eb233c2ea refactor: Modularize shamap and nodestore (#5668)
This change moves the shamap and nodestore from `xrpld` to `libxrpl`.
2025-10-31 22:25:16 +00:00

907 lines
28 KiB
C++

//------------------------------------------------------------------------------
/*
This file is part of rippled: https://github.com/ripple/rippled
Copyright (c) 2012, 2013 Ripple Labs Inc.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#include <xrpl/basics/random.h>
#include <xrpl/shamap/SHAMap.h>
#include <xrpl/shamap/SHAMapLeafNode.h>
#include <xrpl/shamap/SHAMapSyncFilter.h>
namespace ripple {
void
SHAMap::visitLeaves(
std::function<void(boost::intrusive_ptr<SHAMapItem const> const&
item)> const& leafFunction) const
{
visitNodes([&leafFunction](SHAMapTreeNode& node) {
if (!node.isInner())
leafFunction(static_cast<SHAMapLeafNode&>(node).peekItem());
return true;
});
}
void
SHAMap::visitNodes(std::function<bool(SHAMapTreeNode&)> const& function) const
{
if (!root_)
return;
function(*root_);
if (!root_->isInner())
return;
using StackEntry = std::pair<int, intr_ptr::SharedPtr<SHAMapInnerNode>>;
std::stack<StackEntry, std::vector<StackEntry>> stack;
auto node = intr_ptr::static_pointer_cast<SHAMapInnerNode>(root_);
int pos = 0;
while (true)
{
while (pos < 16)
{
if (!node->isEmptyBranch(pos))
{
intr_ptr::SharedPtr<SHAMapTreeNode> child =
descendNoStore(*node, pos);
if (!function(*child))
return;
if (child->isLeaf())
++pos;
else
{
// If there are no more children, don't push this node
while ((pos != 15) && (node->isEmptyBranch(pos + 1)))
++pos;
if (pos != 15)
{
// save next position to resume at
stack.push(std::make_pair(pos + 1, std::move(node)));
}
// descend to the child's first position
node =
intr_ptr::static_pointer_cast<SHAMapInnerNode>(child);
pos = 0;
}
}
else
{
++pos; // move to next position
}
}
if (stack.empty())
break;
std::tie(pos, node) = stack.top();
stack.pop();
}
}
void
SHAMap::visitDifferences(
SHAMap const* have,
std::function<bool(SHAMapTreeNode const&)> const& function) const
{
// Visit every node in this SHAMap that is not present
// in the specified SHAMap
if (!root_)
return;
if (root_->getHash().isZero())
return;
if (have && (root_->getHash() == have->root_->getHash()))
return;
if (root_->isLeaf())
{
auto leaf = intr_ptr::static_pointer_cast<SHAMapLeafNode>(root_);
if (!have ||
!have->hasLeafNode(leaf->peekItem()->key(), leaf->getHash()))
function(*root_);
return;
}
// contains unexplored non-matching inner node entries
using StackEntry = std::pair<SHAMapInnerNode*, SHAMapNodeID>;
std::stack<StackEntry, std::vector<StackEntry>> stack;
stack.push({static_cast<SHAMapInnerNode*>(root_.get()), SHAMapNodeID{}});
while (!stack.empty())
{
auto const [node, nodeID] = stack.top();
stack.pop();
// 1) Add this node to the pack
if (!function(*node))
return;
// 2) push non-matching child inner nodes
for (int i = 0; i < 16; ++i)
{
if (!node->isEmptyBranch(i))
{
auto const& childHash = node->getChildHash(i);
SHAMapNodeID childID = nodeID.getChildNodeID(i);
auto next = descendThrow(node, i);
if (next->isInner())
{
if (!have || !have->hasInnerNode(childID, childHash))
stack.push(
{static_cast<SHAMapInnerNode*>(next), childID});
}
else if (
!have ||
!have->hasLeafNode(
static_cast<SHAMapLeafNode*>(next)->peekItem()->key(),
childHash))
{
if (!function(*next))
return;
}
}
}
}
}
// Starting at the position referred to by the specfied
// StackEntry, process that node and its first resident
// children, descending the SHAMap until we complete the
// processing of a node.
void
SHAMap::gmn_ProcessNodes(MissingNodes& mn, MissingNodes::StackEntry& se)
{
SHAMapInnerNode*& node = std::get<0>(se);
SHAMapNodeID& nodeID = std::get<1>(se);
int& firstChild = std::get<2>(se);
int& currentChild = std::get<3>(se);
bool& fullBelow = std::get<4>(se);
while (currentChild < 16)
{
int branch = (firstChild + currentChild++) % 16;
if (node->isEmptyBranch(branch))
continue;
auto const& childHash = node->getChildHash(branch);
if (mn.missingHashes_.count(childHash) != 0)
{
// we already know this child node is missing
fullBelow = false;
}
else if (
!backed_ ||
!f_.getFullBelowCache()->touch_if_exists(childHash.as_uint256()))
{
bool pending = false;
auto d = descendAsync(
node,
branch,
mn.filter_,
pending,
[node, nodeID, branch, &mn](
intr_ptr::SharedPtr<SHAMapTreeNode> found,
SHAMapHash const&) {
// a read completed asynchronously
std::unique_lock<std::mutex> lock{mn.deferLock_};
mn.finishedReads_.emplace_back(
node, nodeID, branch, std::move(found));
mn.deferCondVar_.notify_one();
});
if (pending)
{
fullBelow = false;
++mn.deferred_;
}
else if (!d)
{
// node is not in database
fullBelow = false; // for now, not known full below
mn.missingHashes_.insert(childHash);
mn.missingNodes_.emplace_back(
nodeID.getChildNodeID(branch), childHash.as_uint256());
if (--mn.max_ <= 0)
return;
}
else if (
d->isInner() &&
!static_cast<SHAMapInnerNode*>(d)->isFullBelow(mn.generation_))
{
mn.stack_.push(se);
// Switch to processing the child node
node = static_cast<SHAMapInnerNode*>(d);
nodeID = nodeID.getChildNodeID(branch);
firstChild = rand_int(255);
currentChild = 0;
fullBelow = true;
}
}
}
// We have finished processing an inner node
// and thus (for now) all its children
if (fullBelow)
{ // No partial node encountered below this node
node->setFullBelowGen(mn.generation_);
if (backed_)
{
f_.getFullBelowCache()->insert(node->getHash().as_uint256());
}
}
node = nullptr;
}
// Wait for deferred reads to finish and
// process their results
void
SHAMap::gmn_ProcessDeferredReads(MissingNodes& mn)
{
// Process all deferred reads
int complete = 0;
while (complete != mn.deferred_)
{
std::tuple<
SHAMapInnerNode*,
SHAMapNodeID,
int,
intr_ptr::SharedPtr<SHAMapTreeNode>>
deferredNode;
{
std::unique_lock<std::mutex> lock{mn.deferLock_};
while (mn.finishedReads_.size() <= complete)
mn.deferCondVar_.wait(lock);
deferredNode = std::move(mn.finishedReads_[complete++]);
}
auto parent = std::get<0>(deferredNode);
auto const& parentID = std::get<1>(deferredNode);
auto branch = std::get<2>(deferredNode);
auto nodePtr = std::get<3>(deferredNode);
auto const& nodeHash = parent->getChildHash(branch);
if (nodePtr)
{ // Got the node
nodePtr = parent->canonicalizeChild(branch, std::move(nodePtr));
// When we finish this stack, we need to restart
// with the parent of this node
mn.resumes_[parent] = parentID;
}
else if ((mn.max_ > 0) && (mn.missingHashes_.insert(nodeHash).second))
{
mn.missingNodes_.emplace_back(
parentID.getChildNodeID(branch), nodeHash.as_uint256());
--mn.max_;
}
}
mn.finishedReads_.clear();
mn.finishedReads_.reserve(mn.maxDefer_);
mn.deferred_ = 0;
}
/** Get a list of node IDs and hashes for nodes that are part of this SHAMap
but not available locally. The filter can hold alternate sources of
nodes that are not permanently stored locally
*/
std::vector<std::pair<SHAMapNodeID, uint256>>
SHAMap::getMissingNodes(int max, SHAMapSyncFilter* filter)
{
XRPL_ASSERT(
root_->getHash().isNonZero(),
"ripple::SHAMap::getMissingNodes : nonzero root hash");
XRPL_ASSERT(max > 0, "ripple::SHAMap::getMissingNodes : valid max input");
MissingNodes mn(
max,
filter,
512, // number of async reads per pass
f_.getFullBelowCache()->getGeneration());
if (!root_->isInner() ||
intr_ptr::static_pointer_cast<SHAMapInnerNode>(root_)->isFullBelow(
mn.generation_))
{
clearSynching();
return std::move(mn.missingNodes_);
}
// Start at the root.
// The firstChild value is selected randomly so if multiple threads
// are traversing the map, each thread will start at a different
// (randomly selected) inner node. This increases the likelihood
// that the two threads will produce different request sets (which is
// more efficient than sending identical requests).
MissingNodes::StackEntry pos{
static_cast<SHAMapInnerNode*>(root_.get()),
SHAMapNodeID(),
rand_int(255),
0,
true};
auto& node = std::get<0>(pos);
auto& nextChild = std::get<3>(pos);
auto& fullBelow = std::get<4>(pos);
// Traverse the map without blocking
do
{
while ((node != nullptr) && (mn.deferred_ <= mn.maxDefer_))
{
gmn_ProcessNodes(mn, pos);
if (mn.max_ <= 0)
break;
if ((node == nullptr) && !mn.stack_.empty())
{
// Pick up where we left off with this node's parent
bool was = fullBelow; // was full below
pos = mn.stack_.top();
mn.stack_.pop();
if (nextChild == 0)
{
// This is a node we are processing for the first time
fullBelow = true;
}
else
{
// This is a node we are continuing to process
fullBelow = fullBelow && was; // was and still is
}
XRPL_ASSERT(
node,
"ripple::SHAMap::getMissingNodes : first non-null node");
}
}
// We have either emptied the stack or
// posted as many deferred reads as we can
if (mn.deferred_)
gmn_ProcessDeferredReads(mn);
if (mn.max_ <= 0)
return std::move(mn.missingNodes_);
if (node == nullptr)
{ // We weren't in the middle of processing a node
if (mn.stack_.empty() && !mn.resumes_.empty())
{
// Recheck nodes we could not finish before
for (auto const& [innerNode, nodeId] : mn.resumes_)
if (!innerNode->isFullBelow(mn.generation_))
mn.stack_.push(std::make_tuple(
innerNode, nodeId, rand_int(255), 0, true));
mn.resumes_.clear();
}
if (!mn.stack_.empty())
{
// Resume at the top of the stack
pos = mn.stack_.top();
mn.stack_.pop();
XRPL_ASSERT(
node,
"ripple::SHAMap::getMissingNodes : second non-null node");
}
}
// node will only still be nullptr if
// we finished the current node, the stack is empty
// and we have no nodes to resume
} while (node != nullptr);
if (mn.missingNodes_.empty())
clearSynching();
return std::move(mn.missingNodes_);
}
bool
SHAMap::getNodeFat(
SHAMapNodeID const& wanted,
std::vector<std::pair<SHAMapNodeID, Blob>>& data,
bool fatLeaves,
std::uint32_t depth) const
{
// Gets a node and some of its children
// to a specified depth
auto node = root_.get();
SHAMapNodeID nodeID;
while (node && node->isInner() && (nodeID.getDepth() < wanted.getDepth()))
{
int branch = selectBranch(nodeID, wanted.getNodeID());
auto inner = static_cast<SHAMapInnerNode*>(node);
if (inner->isEmptyBranch(branch))
return false;
node = descendThrow(inner, branch);
nodeID = nodeID.getChildNodeID(branch);
}
if (node == nullptr || wanted != nodeID)
{
JLOG(journal_.info())
<< "peer requested node that is not in the map: " << wanted
<< " but found " << nodeID;
return false;
}
if (node->isInner() && static_cast<SHAMapInnerNode*>(node)->isEmpty())
{
JLOG(journal_.warn()) << "peer requests empty node";
return false;
}
std::stack<std::tuple<SHAMapTreeNode*, SHAMapNodeID, int>> stack;
stack.emplace(node, nodeID, depth);
Serializer s(8192);
while (!stack.empty())
{
std::tie(node, nodeID, depth) = stack.top();
stack.pop();
// Add this node to the reply
s.erase();
node->serializeForWire(s);
data.emplace_back(std::make_pair(nodeID, s.getData()));
if (node->isInner())
{
// We descend inner nodes with only a single child
// without decrementing the depth
auto inner = static_cast<SHAMapInnerNode*>(node);
int bc = inner->getBranchCount();
if ((depth > 0) || (bc == 1))
{
// We need to process this node's children
for (int i = 0; i < 16; ++i)
{
if (!inner->isEmptyBranch(i))
{
auto const childNode = descendThrow(inner, i);
auto const childID = nodeID.getChildNodeID(i);
if (childNode->isInner() && ((depth > 1) || (bc == 1)))
{
// If there's more than one child, reduce the depth
// If only one child, follow the chain
stack.emplace(
childNode,
childID,
(bc > 1) ? (depth - 1) : depth);
}
else if (childNode->isInner() || fatLeaves)
{
// Just include this node
s.erase();
childNode->serializeForWire(s);
data.emplace_back(
std::make_pair(childID, s.getData()));
}
}
}
}
}
}
return true;
}
void
SHAMap::serializeRoot(Serializer& s) const
{
root_->serializeForWire(s);
}
SHAMapAddNode
SHAMap::addRootNode(
SHAMapHash const& hash,
Slice const& rootNode,
SHAMapSyncFilter* filter)
{
// we already have a root_ node
if (root_->getHash().isNonZero())
{
JLOG(journal_.trace()) << "got root node, already have one";
XRPL_ASSERT(
root_->getHash() == hash,
"ripple::SHAMap::addRootNode : valid hash input");
return SHAMapAddNode::duplicate();
}
XRPL_ASSERT(cowid_ >= 1, "ripple::SHAMap::addRootNode : valid cowid");
auto node = SHAMapTreeNode::makeFromWire(rootNode);
if (!node || node->getHash() != hash)
return SHAMapAddNode::invalid();
if (backed_)
canonicalize(hash, node);
root_ = node;
if (root_->isLeaf())
clearSynching();
if (filter)
{
Serializer s;
root_->serializeWithPrefix(s);
filter->gotNode(
false,
root_->getHash(),
ledgerSeq_,
std::move(s.modData()),
root_->getType());
}
return SHAMapAddNode::useful();
}
SHAMapAddNode
SHAMap::addKnownNode(
SHAMapNodeID const& node,
Slice const& rawNode,
SHAMapSyncFilter* filter)
{
XRPL_ASSERT(
!node.isRoot(), "ripple::SHAMap::addKnownNode : valid node input");
if (!isSynching())
{
JLOG(journal_.trace()) << "AddKnownNode while not synching";
return SHAMapAddNode::duplicate();
}
auto const generation = f_.getFullBelowCache()->getGeneration();
SHAMapNodeID currNodeID;
auto currNode = root_.get();
while (currNode->isInner() &&
!static_cast<SHAMapInnerNode*>(currNode)->isFullBelow(generation) &&
(currNodeID.getDepth() < node.getDepth()))
{
int const branch = selectBranch(currNodeID, node.getNodeID());
XRPL_ASSERT(branch >= 0, "ripple::SHAMap::addKnownNode : valid branch");
auto inner = static_cast<SHAMapInnerNode*>(currNode);
if (inner->isEmptyBranch(branch))
{
JLOG(journal_.warn()) << "Add known node for empty branch" << node;
return SHAMapAddNode::invalid();
}
auto childHash = inner->getChildHash(branch);
if (f_.getFullBelowCache()->touch_if_exists(childHash.as_uint256()))
{
return SHAMapAddNode::duplicate();
}
auto prevNode = inner;
std::tie(currNode, currNodeID) =
descend(inner, currNodeID, branch, filter);
if (currNode != nullptr)
continue;
auto newNode = SHAMapTreeNode::makeFromWire(rawNode);
if (!newNode || childHash != newNode->getHash())
{
JLOG(journal_.warn()) << "Corrupt node received";
return SHAMapAddNode::invalid();
}
// In rare cases, a node can still be corrupt even after hash
// validation. For leaf nodes, we perform an additional check to
// ensure the node's position in the tree is consistent with its
// content to prevent inconsistencies that could
// propagate further down the line.
if (newNode->isLeaf())
{
auto const& actualKey =
static_cast<SHAMapLeafNode const*>(newNode.get())
->peekItem()
->key();
// Validate that this leaf belongs at the target position
auto const expectedNodeID =
SHAMapNodeID::createID(node.getDepth(), actualKey);
if (expectedNodeID.getNodeID() != node.getNodeID())
{
JLOG(journal_.debug())
<< "Leaf node position mismatch: "
<< "expected=" << expectedNodeID.getNodeID()
<< ", actual=" << node.getNodeID();
return SHAMapAddNode::invalid();
}
}
// Inner nodes must be at a level strictly less than 64
// but leaf nodes (while notionally at level 64) can be
// at any depth up to and including 64:
if ((currNodeID.getDepth() > leafDepth) ||
(newNode->isInner() && currNodeID.getDepth() == leafDepth))
{
// Map is provably invalid
state_ = SHAMapState::Invalid;
return SHAMapAddNode::useful();
}
if (currNodeID != node)
{
// Either this node is broken or we didn't request it (yet)
JLOG(journal_.warn()) << "unable to hook node " << node;
JLOG(journal_.info()) << " stuck at " << currNodeID;
JLOG(journal_.info()) << "got depth=" << node.getDepth()
<< ", walked to= " << currNodeID.getDepth();
return SHAMapAddNode::useful();
}
if (backed_)
canonicalize(childHash, newNode);
newNode = prevNode->canonicalizeChild(branch, std::move(newNode));
if (filter)
{
Serializer s;
newNode->serializeWithPrefix(s);
filter->gotNode(
false,
childHash,
ledgerSeq_,
std::move(s.modData()),
newNode->getType());
}
return SHAMapAddNode::useful();
}
JLOG(journal_.trace()) << "got node, already had it (late)";
return SHAMapAddNode::duplicate();
}
bool
SHAMap::deepCompare(SHAMap& other) const
{
// Intended for debug/test only
std::stack<std::pair<SHAMapTreeNode*, SHAMapTreeNode*>> stack;
stack.push({root_.get(), other.root_.get()});
while (!stack.empty())
{
auto const [node, otherNode] = stack.top();
stack.pop();
if (!node || !otherNode)
{
JLOG(journal_.info()) << "unable to fetch node";
return false;
}
else if (otherNode->getHash() != node->getHash())
{
JLOG(journal_.warn()) << "node hash mismatch";
return false;
}
if (node->isLeaf())
{
if (!otherNode->isLeaf())
return false;
auto& nodePeek = static_cast<SHAMapLeafNode*>(node)->peekItem();
auto& otherNodePeek =
static_cast<SHAMapLeafNode*>(otherNode)->peekItem();
if (nodePeek->key() != otherNodePeek->key())
return false;
if (nodePeek->slice() != otherNodePeek->slice())
return false;
}
else if (node->isInner())
{
if (!otherNode->isInner())
return false;
auto node_inner = static_cast<SHAMapInnerNode*>(node);
auto other_inner = static_cast<SHAMapInnerNode*>(otherNode);
for (int i = 0; i < 16; ++i)
{
if (node_inner->isEmptyBranch(i))
{
if (!other_inner->isEmptyBranch(i))
return false;
}
else
{
if (other_inner->isEmptyBranch(i))
return false;
auto next = descend(node_inner, i);
auto otherNext = other.descend(other_inner, i);
if (!next || !otherNext)
{
JLOG(journal_.warn()) << "unable to fetch inner node";
return false;
}
stack.push({next, otherNext});
}
}
}
}
return true;
}
/** Does this map have this inner node?
*/
bool
SHAMap::hasInnerNode(
SHAMapNodeID const& targetNodeID,
SHAMapHash const& targetNodeHash) const
{
auto node = root_.get();
SHAMapNodeID nodeID;
while (node->isInner() && (nodeID.getDepth() < targetNodeID.getDepth()))
{
int branch = selectBranch(nodeID, targetNodeID.getNodeID());
auto inner = static_cast<SHAMapInnerNode*>(node);
if (inner->isEmptyBranch(branch))
return false;
node = descendThrow(inner, branch);
nodeID = nodeID.getChildNodeID(branch);
}
return (node->isInner()) && (node->getHash() == targetNodeHash);
}
/** Does this map have this leaf node?
*/
bool
SHAMap::hasLeafNode(uint256 const& tag, SHAMapHash const& targetNodeHash) const
{
auto node = root_.get();
SHAMapNodeID nodeID;
if (!node->isInner()) // only one leaf node in the tree
return node->getHash() == targetNodeHash;
do
{
int branch = selectBranch(nodeID, tag);
auto inner = static_cast<SHAMapInnerNode*>(node);
if (inner->isEmptyBranch(branch))
return false; // Dead end, node must not be here
if (inner->getChildHash(branch) ==
targetNodeHash) // Matching leaf, no need to retrieve it
return true;
node = descendThrow(inner, branch);
nodeID = nodeID.getChildNodeID(branch);
} while (node->isInner());
return false; // If this was a matching leaf, we would have caught it
// already
}
std::optional<std::vector<Blob>>
SHAMap::getProofPath(uint256 const& key) const
{
SharedPtrNodeStack stack;
walkTowardsKey(key, &stack);
if (stack.empty())
{
JLOG(journal_.debug()) << "no path to " << key;
return {};
}
if (auto const& node = stack.top().first; !node || node->isInner() ||
intr_ptr::static_pointer_cast<SHAMapLeafNode>(node)
->peekItem()
->key() != key)
{
JLOG(journal_.debug()) << "no path to " << key;
return {};
}
std::vector<Blob> path;
path.reserve(stack.size());
while (!stack.empty())
{
Serializer s;
stack.top().first->serializeForWire(s);
path.emplace_back(std::move(s.modData()));
stack.pop();
}
JLOG(journal_.debug()) << "getPath for key " << key << ", path length "
<< path.size();
return path;
}
bool
SHAMap::verifyProofPath(
uint256 const& rootHash,
uint256 const& key,
std::vector<Blob> const& path)
{
if (path.empty() || path.size() > 65)
return false;
SHAMapHash hash{rootHash};
try
{
for (auto rit = path.rbegin(); rit != path.rend(); ++rit)
{
auto const& blob = *rit;
auto node = SHAMapTreeNode::makeFromWire(makeSlice(blob));
if (!node)
return false;
node->updateHash();
if (node->getHash() != hash)
return false;
auto depth = std::distance(path.rbegin(), rit);
if (node->isInner())
{
auto nodeId = SHAMapNodeID::createID(depth, key);
hash = static_cast<SHAMapInnerNode*>(node.get())
->getChildHash(selectBranch(nodeId, key));
}
else
{
// should exhaust all the blobs now
return depth + 1 == path.size();
}
}
}
catch (std::exception const&)
{
// the data in the path may come from the network,
// exception could be thrown when parsing the data
return false;
}
return false;
}
} // namespace ripple