Reporting Mode: Do not attempt to acquire missing data from peer network (#4458)

In Reporting Mode, a server would core dump when it is not able to read
from Cassandra. This patch prevents the core dump when Cassandra is down
for reporting mode servers. This does not fix the root cause, but it
cuts down on some of the resulting noise.
This commit is contained in:
Mark Travis
2023-03-14 20:49:40 -07:00
committed by GitHub
parent 1e7710eee2
commit f7b3ddd87b
8 changed files with 57 additions and 22 deletions

View File

@@ -266,7 +266,7 @@ Ledger::Ledger(
{
info_.hash = calculateLedgerHash(info_);
if (acquire && !config.reporting())
family.missingNode(info_.hash, info_.seq);
family.missingNodeAcquireByHash(info_.hash, info_.seq);
}
}

View File

@@ -75,11 +75,23 @@ public:
virtual bool
isShardBacked() const = 0;
/** Acquire ledger that has a missing node by ledger sequence
*
* Throw if in reporting mode.
*
* @param refNum Sequence of ledger to acquire.
* @param nodeHash Hash of missing node to report in throw.
*/
virtual void
missingNode(std::uint32_t refNum) = 0;
missingNodeAcquireBySeq(std::uint32_t refNum, uint256 const& nodeHash) = 0;
/** Acquire ledger that has a missing node by ledger hash
*
* @param refHash Hash of ledger to acquire.
* @param refNum Ledger sequence with missing node.
*/
virtual void
missingNode(uint256 const& refHash, std::uint32_t refNum) = 0;
missingNodeAcquireByHash(uint256 const& refHash, std::uint32_t refNum) = 0;
virtual void
reset() = 0;

View File

@@ -83,10 +83,10 @@ public:
reset() override;
void
missingNode(std::uint32_t seq) override;
missingNodeAcquireBySeq(std::uint32_t seq, uint256 const& hash) override;
void
missingNode(uint256 const& hash, std::uint32_t seq) override
missingNodeAcquireByHash(uint256 const& hash, std::uint32_t seq) override
{
acquire(hash, seq);
}

View File

@@ -89,10 +89,11 @@ public:
reset() override;
void
missingNode(std::uint32_t seq) override;
missingNodeAcquireBySeq(std::uint32_t seq, uint256 const& nodeHash)
override;
void
missingNode(uint256 const& hash, std::uint32_t seq) override
missingNodeAcquireByHash(uint256 const& hash, std::uint32_t seq) override
{
acquire(hash, seq);
}

View File

@@ -21,6 +21,7 @@
#include <ripple/app/main/Application.h>
#include <ripple/app/main/Tuning.h>
#include <ripple/shamap/NodeFamily.h>
#include <sstream>
namespace ripple {
@@ -65,9 +66,16 @@ NodeFamily::reset()
}
void
NodeFamily::missingNode(std::uint32_t seq)
NodeFamily::missingNodeAcquireBySeq(std::uint32_t seq, uint256 const& nodeHash)
{
JLOG(j_.error()) << "Missing node in " << seq;
if (app_.config().reporting())
{
std::stringstream ss;
ss << "Node not read, likely a Cassandra error in ledger seq " << seq
<< " object hash " << nodeHash;
Throw<std::runtime_error>(ss.str());
}
std::unique_lock<std::mutex> lock(maxSeqMutex_);
if (maxSeq_ == 0)

View File

@@ -173,30 +173,40 @@ SHAMap::finishFetch(
std::shared_ptr<NodeObject> const& object) const
{
assert(backed_);
if (!object)
{
if (full_)
{
full_ = false;
f_.missingNode(ledgerSeq_);
}
return {};
}
std::shared_ptr<SHAMapTreeNode> node;
try
{
if (!object)
{
if (full_)
{
full_ = false;
f_.missingNodeAcquireBySeq(ledgerSeq_, hash.as_uint256());
}
return {};
}
node =
SHAMapTreeNode::makeFromPrefix(makeSlice(object->getData()), hash);
if (node)
canonicalize(hash, node);
return node;
}
catch (std::exception const&)
catch (SHAMapMissingNode const& e)
{
JLOG(journal_.warn()) << "Missing node: " << hash << " : " << e.what();
}
catch (std::runtime_error const& e)
{
JLOG(journal_.warn()) << e.what();
}
catch (...)
{
JLOG(journal_.warn()) << "Invalid DB node " << hash;
return std::shared_ptr<SHAMapTreeNode>();
}
return std::shared_ptr<SHAMapTreeNode>();
}
// See if a sync filter has a node

View File

@@ -22,6 +22,7 @@
#include <ripple/app/main/Tuning.h>
#include <ripple/nodestore/DatabaseShard.h>
#include <ripple/shamap/ShardFamily.h>
#include <tuple>
namespace ripple {
@@ -152,8 +153,9 @@ ShardFamily::reset()
}
void
ShardFamily::missingNode(std::uint32_t seq)
ShardFamily::missingNodeAcquireBySeq(std::uint32_t seq, uint256 const& nodeHash)
{
std::ignore = nodeHash;
JLOG(j_.error()) << "Missing node in ledger sequence " << seq;
std::unique_lock<std::mutex> lock(maxSeqMutex_);

View File

@@ -105,13 +105,15 @@ public:
}
void
missingNode(std::uint32_t refNum) override
missingNodeAcquireBySeq(std::uint32_t refNum, uint256 const& nodeHash)
override
{
Throw<std::runtime_error>("missing node");
}
void
missingNode(uint256 const& refHash, std::uint32_t refNum) override
missingNodeAcquireByHash(uint256 const& refHash, std::uint32_t refNum)
override
{
Throw<std::runtime_error>("missing node");
}