General purpose function to retry on database timeout

This commit is contained in:
CJ Cobb
2022-01-25 20:10:02 +00:00
parent da96608feb
commit bc131f666a
7 changed files with 117 additions and 136 deletions

View File

@@ -27,17 +27,7 @@ std::optional<LedgerRange>
BackendInterface::hardFetchLedgerRangeNoThrow() const BackendInterface::hardFetchLedgerRangeNoThrow() const
{ {
BOOST_LOG_TRIVIAL(debug) << __func__; BOOST_LOG_TRIVIAL(debug) << __func__;
while (true) return retryOnTimeout([&]() { return hardFetchLedgerRange(); });
{
try
{
return hardFetchLedgerRange();
}
catch (DatabaseTimeout& t)
{
;
}
}
} }
// *** state data methods // *** state data methods
std::optional<Blob> std::optional<Blob>

View File

@@ -5,6 +5,8 @@
#include <backend/DBHelpers.h> #include <backend/DBHelpers.h>
#include <backend/SimpleCache.h> #include <backend/SimpleCache.h>
#include <backend/Types.h> #include <backend/Types.h>
#include <thread>
#include <type_traits>
namespace Backend { namespace Backend {
class DatabaseTimeout : public std::exception class DatabaseTimeout : public std::exception
@@ -16,6 +18,25 @@ class DatabaseTimeout : public std::exception
} }
}; };
template <class F>
auto
retryOnTimeout(F func, size_t waitMs = 500)
{
while (true)
{
try
{
return func();
}
catch (DatabaseTimeout& t)
{
std::this_thread::sleep_for(std::chrono::milliseconds(waitMs));
BOOST_LOG_TRIVIAL(error)
<< __func__ << " function timed out. Retrying ... ";
}
}
}
class BackendInterface class BackendInterface
{ {
protected: protected:

View File

@@ -690,10 +690,8 @@ CassandraBackend::doOnlineDelete(uint32_t numLedgersToKeep) const
std::optional<ripple::uint256> cursor; std::optional<ripple::uint256> cursor;
while (true) while (true)
{ {
try auto [objects, curCursor, warning] = retryOnTimeout(
{ [&]() { return fetchLedgerPage(cursor, minLedger, 256); });
auto [objects, curCursor, warning] =
fetchLedgerPage(cursor, minLedger, 256);
if (warning) if (warning)
{ {
BOOST_LOG_TRIVIAL(warning) BOOST_LOG_TRIVIAL(warning)
@@ -726,13 +724,6 @@ CassandraBackend::doOnlineDelete(uint32_t numLedgersToKeep) const
if (!cursor) if (!cursor)
break; break;
} }
catch (DatabaseTimeout const& e)
{
BOOST_LOG_TRIVIAL(warning)
<< __func__ << " Database timeout fetching keys";
std::this_thread::sleep_for(std::chrono::seconds(2));
}
}
std::unique_lock<std::mutex> lck(mtx); std::unique_lock<std::mutex> lck(mtx);
cv.wait(lck, [&numOutstanding]() { return numOutstanding == 0; }); cv.wait(lck, [&numOutstanding]() { return numOutstanding == 0; });
CassandraStatement statement{deleteLedgerRange_}; CassandraStatement statement{deleteLedgerRange_};

View File

@@ -720,10 +720,8 @@ PostgresBackend::doOnlineDelete(uint32_t numLedgersToKeep) const
std::optional<ripple::uint256> cursor; std::optional<ripple::uint256> cursor;
while (true) while (true)
{ {
try auto [objects, curCursor, warning] = retryOnTimeout(
{ [&]() { return fetchLedgerPage(cursor, minLedger, 256); });
auto [objects, curCursor, warning] =
fetchLedgerPage(cursor, minLedger, 256);
if (warning) if (warning)
{ {
BOOST_LOG_TRIVIAL(warning) << __func__ BOOST_LOG_TRIVIAL(warning) << __func__
@@ -746,13 +744,6 @@ PostgresBackend::doOnlineDelete(uint32_t numLedgersToKeep) const
if (!cursor) if (!cursor)
break; break;
} }
catch (DatabaseTimeout const& e)
{
BOOST_LOG_TRIVIAL(warning)
<< __func__ << " Database timeout fetching keys";
std::this_thread::sleep_for(std::chrono::seconds(2));
}
}
BOOST_LOG_TRIVIAL(info) << __func__ << " finished inserting into objects"; BOOST_LOG_TRIVIAL(info) << __func__ << " finished inserting into objects";
{ {
std::stringstream sql; std::stringstream sql;

View File

@@ -131,37 +131,23 @@ ReportingETL::publishLedger(ripple::LedgerInfo const& lgrInfo)
{ {
BOOST_LOG_TRIVIAL(debug) BOOST_LOG_TRIVIAL(debug)
<< __func__ << " - Publishing ledger " << std::to_string(lgrInfo.seq); << __func__ << " - Publishing ledger " << std::to_string(lgrInfo.seq);
if (!writing_) if (!writing_)
{ {
BOOST_LOG_TRIVIAL(debug) << __func__ << " - Updating cache"; BOOST_LOG_TRIVIAL(debug) << __func__ << " - Updating cache";
auto diff = backend_->fetchLedgerDiff(lgrInfo.seq); auto diff = Backend::retryOnTimeout(
[&]() { return backend_->fetchLedgerDiff(lgrInfo.seq); });
backend_->cache().update(diff, lgrInfo.seq); backend_->cache().update(diff, lgrInfo.seq);
} }
backend_->updateRange(lgrInfo.seq); backend_->updateRange(lgrInfo.seq);
auto fees = Backend::retryOnTimeout(
[&]() { return backend_->fetchFees(lgrInfo.seq); });
auto transactions = Backend::retryOnTimeout(
[&]() { return backend_->fetchAllTransactionsInLedger(lgrInfo.seq); });
auto ledgerRange = backend_->fetchLedgerRange(); auto ledgerRange = backend_->fetchLedgerRange();
assert(ledgerRange);
std::optional<ripple::Fees> fees; assert(fees);
std::vector<Backend::TransactionAndMetadata> transactions;
while (true)
{
try
{
fees = backend_->fetchFees(lgrInfo.seq);
transactions = backend_->fetchAllTransactionsInLedger(lgrInfo.seq);
break;
}
catch (Backend::DatabaseTimeout const&)
{
BOOST_LOG_TRIVIAL(warning) << "Read timeout fetching transactions";
}
}
if (!fees || !ledgerRange)
{
BOOST_LOG_TRIVIAL(error)
<< __func__ << " - could not fetch from database";
return;
}
std::string range = std::to_string(ledgerRange->minSequence) + "-" + std::string range = std::to_string(ledgerRange->minSequence) + "-" +
std::to_string(ledgerRange->maxSequence); std::to_string(ledgerRange->maxSequence);
@@ -172,7 +158,7 @@ ReportingETL::publishLedger(ripple::LedgerInfo const& lgrInfo)
subscriptions_->pubTransaction(txAndMeta, lgrInfo); subscriptions_->pubTransaction(txAndMeta, lgrInfo);
setLastPublish(); setLastPublish();
BOOST_LOG_TRIVIAL(debug) BOOST_LOG_TRIVIAL(info)
<< __func__ << " - Published ledger " << std::to_string(lgrInfo.seq); << __func__ << " - Published ledger " << std::to_string(lgrInfo.seq);
} }
@@ -186,8 +172,6 @@ ReportingETL::publishLedger(
<< "Attempting to publish ledger = " << ledgerSequence; << "Attempting to publish ledger = " << ledgerSequence;
size_t numAttempts = 0; size_t numAttempts = 0;
while (!stopping_) while (!stopping_)
{
try
{ {
auto range = backend_->hardFetchLedgerRangeNoThrow(); auto range = backend_->hardFetchLedgerRangeNoThrow();
@@ -201,10 +185,9 @@ ReportingETL::publishLedger(
// second in between each attempt. // second in between each attempt.
if (maxAttempts && numAttempts >= maxAttempts) if (maxAttempts && numAttempts >= maxAttempts)
{ {
BOOST_LOG_TRIVIAL(debug) BOOST_LOG_TRIVIAL(debug) << __func__ << " : "
<< __func__ << " : " << "Failed to publish ledger after "
<< "Failed to publish ledger after " << numAttempts << numAttempts << " attempts.";
<< " attempts.";
return false; return false;
} }
std::this_thread::sleep_for(std::chrono::seconds(1)); std::this_thread::sleep_for(std::chrono::seconds(1));
@@ -213,18 +196,15 @@ ReportingETL::publishLedger(
} }
else else
{ {
auto lgr = backend_->fetchLedgerBySequence(ledgerSequence); auto lgr = Backend::retryOnTimeout([&]() {
return backend_->fetchLedgerBySequence(ledgerSequence);
});
assert(lgr); assert(lgr);
publishLedger(*lgr); publishLedger(*lgr);
return true; return true;
} }
} }
catch (Backend::DatabaseTimeout const& e)
{
continue;
}
}
return false; return false;
} }
@@ -678,9 +658,12 @@ ReportingETL::runETLPipeline(uint32_t startSequence, int numExtractors)
// success is false if the ledger was already written // success is false if the ledger was already written
if (success) if (success)
{ {
/*
boost::asio::post(publishStrand_, [this, lgrInfo = lgrInfo]() { boost::asio::post(publishStrand_, [this, lgrInfo = lgrInfo]() {
publishLedger(lgrInfo); publishLedger(lgrInfo);
}); });
*/
backend_->updateRange(lgrInfo.seq);
lastPublishedSequence = lgrInfo.seq; lastPublishedSequence = lgrInfo.seq;
} }
writeConflict = !success; writeConflict = !success;

View File

@@ -232,8 +232,11 @@ SubscriptionManager::pubTransaction(
auto amount = tx->getFieldAmount(ripple::sfTakerGets); auto amount = tx->getFieldAmount(ripple::sfTakerGets);
if (account != amount.issue().account) if (account != amount.issue().account)
{ {
auto ownerFunds = auto ownerFunds = Backend::retryOnTimeout([&]() {
RPC::accountFunds(*backend_, lgrInfo.seq, amount, account); return RPC::accountFunds(
*backend_, lgrInfo.seq, amount, account);
});
pubObj["transaction"].as_object()["owner_funds"] = pubObj["transaction"].as_object()["owner_funds"] =
ownerFunds.getText(); ownerFunds.getText();
} }

View File

@@ -282,6 +282,8 @@ public:
catch (Backend::DatabaseTimeout const& t) catch (Backend::DatabaseTimeout const& t)
{ {
BOOST_LOG_TRIVIAL(error) << __func__ << " Database timeout"; BOOST_LOG_TRIVIAL(error) << __func__ << " Database timeout";
// TODO this should be a diff error code. Rippled probably
// does not have an analagous error code
return sendError(RPC::Error::rpcNOT_READY); return sendError(RPC::Error::rpcNOT_READY);
} }
} }