General purpose function to retry on database timeout

This commit is contained in:
CJ Cobb
2022-01-25 20:10:02 +00:00
parent da96608feb
commit bc131f666a
7 changed files with 117 additions and 136 deletions

View File

@@ -27,17 +27,7 @@ std::optional<LedgerRange>
BackendInterface::hardFetchLedgerRangeNoThrow() const
{
BOOST_LOG_TRIVIAL(debug) << __func__;
while (true)
{
try
{
return hardFetchLedgerRange();
}
catch (DatabaseTimeout& t)
{
;
}
}
return retryOnTimeout([&]() { return hardFetchLedgerRange(); });
}
// *** state data methods
std::optional<Blob>

View File

@@ -5,6 +5,8 @@
#include <backend/DBHelpers.h>
#include <backend/SimpleCache.h>
#include <backend/Types.h>
#include <thread>
#include <type_traits>
namespace Backend {
class DatabaseTimeout : public std::exception
@@ -16,6 +18,25 @@ class DatabaseTimeout : public std::exception
}
};
template <class F>
auto
retryOnTimeout(F func, size_t waitMs = 500)
{
while (true)
{
try
{
return func();
}
catch (DatabaseTimeout& t)
{
std::this_thread::sleep_for(std::chrono::milliseconds(waitMs));
BOOST_LOG_TRIVIAL(error)
<< __func__ << " function timed out. Retrying ... ";
}
}
}
class BackendInterface
{
protected:

View File

@@ -690,10 +690,8 @@ CassandraBackend::doOnlineDelete(uint32_t numLedgersToKeep) const
std::optional<ripple::uint256> cursor;
while (true)
{
try
{
auto [objects, curCursor, warning] =
fetchLedgerPage(cursor, minLedger, 256);
auto [objects, curCursor, warning] = retryOnTimeout(
[&]() { return fetchLedgerPage(cursor, minLedger, 256); });
if (warning)
{
BOOST_LOG_TRIVIAL(warning)
@@ -726,13 +724,6 @@ CassandraBackend::doOnlineDelete(uint32_t numLedgersToKeep) const
if (!cursor)
break;
}
catch (DatabaseTimeout const& e)
{
BOOST_LOG_TRIVIAL(warning)
<< __func__ << " Database timeout fetching keys";
std::this_thread::sleep_for(std::chrono::seconds(2));
}
}
std::unique_lock<std::mutex> lck(mtx);
cv.wait(lck, [&numOutstanding]() { return numOutstanding == 0; });
CassandraStatement statement{deleteLedgerRange_};

View File

@@ -720,10 +720,8 @@ PostgresBackend::doOnlineDelete(uint32_t numLedgersToKeep) const
std::optional<ripple::uint256> cursor;
while (true)
{
try
{
auto [objects, curCursor, warning] =
fetchLedgerPage(cursor, minLedger, 256);
auto [objects, curCursor, warning] = retryOnTimeout(
[&]() { return fetchLedgerPage(cursor, minLedger, 256); });
if (warning)
{
BOOST_LOG_TRIVIAL(warning) << __func__
@@ -746,13 +744,6 @@ PostgresBackend::doOnlineDelete(uint32_t numLedgersToKeep) const
if (!cursor)
break;
}
catch (DatabaseTimeout const& e)
{
BOOST_LOG_TRIVIAL(warning)
<< __func__ << " Database timeout fetching keys";
std::this_thread::sleep_for(std::chrono::seconds(2));
}
}
BOOST_LOG_TRIVIAL(info) << __func__ << " finished inserting into objects";
{
std::stringstream sql;

View File

@@ -131,37 +131,23 @@ ReportingETL::publishLedger(ripple::LedgerInfo const& lgrInfo)
{
BOOST_LOG_TRIVIAL(debug)
<< __func__ << " - Publishing ledger " << std::to_string(lgrInfo.seq);
if (!writing_)
{
BOOST_LOG_TRIVIAL(debug) << __func__ << " - Updating cache";
auto diff = backend_->fetchLedgerDiff(lgrInfo.seq);
auto diff = Backend::retryOnTimeout(
[&]() { return backend_->fetchLedgerDiff(lgrInfo.seq); });
backend_->cache().update(diff, lgrInfo.seq);
}
backend_->updateRange(lgrInfo.seq);
auto fees = Backend::retryOnTimeout(
[&]() { return backend_->fetchFees(lgrInfo.seq); });
auto transactions = Backend::retryOnTimeout(
[&]() { return backend_->fetchAllTransactionsInLedger(lgrInfo.seq); });
auto ledgerRange = backend_->fetchLedgerRange();
std::optional<ripple::Fees> fees;
std::vector<Backend::TransactionAndMetadata> transactions;
while (true)
{
try
{
fees = backend_->fetchFees(lgrInfo.seq);
transactions = backend_->fetchAllTransactionsInLedger(lgrInfo.seq);
break;
}
catch (Backend::DatabaseTimeout const&)
{
BOOST_LOG_TRIVIAL(warning) << "Read timeout fetching transactions";
}
}
if (!fees || !ledgerRange)
{
BOOST_LOG_TRIVIAL(error)
<< __func__ << " - could not fetch from database";
return;
}
assert(ledgerRange);
assert(fees);
std::string range = std::to_string(ledgerRange->minSequence) + "-" +
std::to_string(ledgerRange->maxSequence);
@@ -172,7 +158,7 @@ ReportingETL::publishLedger(ripple::LedgerInfo const& lgrInfo)
subscriptions_->pubTransaction(txAndMeta, lgrInfo);
setLastPublish();
BOOST_LOG_TRIVIAL(debug)
BOOST_LOG_TRIVIAL(info)
<< __func__ << " - Published ledger " << std::to_string(lgrInfo.seq);
}
@@ -186,8 +172,6 @@ ReportingETL::publishLedger(
<< "Attempting to publish ledger = " << ledgerSequence;
size_t numAttempts = 0;
while (!stopping_)
{
try
{
auto range = backend_->hardFetchLedgerRangeNoThrow();
@@ -201,10 +185,9 @@ ReportingETL::publishLedger(
// second in between each attempt.
if (maxAttempts && numAttempts >= maxAttempts)
{
BOOST_LOG_TRIVIAL(debug)
<< __func__ << " : "
<< "Failed to publish ledger after " << numAttempts
<< " attempts.";
BOOST_LOG_TRIVIAL(debug) << __func__ << " : "
<< "Failed to publish ledger after "
<< numAttempts << " attempts.";
return false;
}
std::this_thread::sleep_for(std::chrono::seconds(1));
@@ -213,18 +196,15 @@ ReportingETL::publishLedger(
}
else
{
auto lgr = backend_->fetchLedgerBySequence(ledgerSequence);
auto lgr = Backend::retryOnTimeout([&]() {
return backend_->fetchLedgerBySequence(ledgerSequence);
});
assert(lgr);
publishLedger(*lgr);
return true;
}
}
catch (Backend::DatabaseTimeout const& e)
{
continue;
}
}
return false;
}
@@ -678,9 +658,12 @@ ReportingETL::runETLPipeline(uint32_t startSequence, int numExtractors)
// success is false if the ledger was already written
if (success)
{
/*
boost::asio::post(publishStrand_, [this, lgrInfo = lgrInfo]() {
publishLedger(lgrInfo);
});
*/
backend_->updateRange(lgrInfo.seq);
lastPublishedSequence = lgrInfo.seq;
}
writeConflict = !success;

View File

@@ -232,8 +232,11 @@ SubscriptionManager::pubTransaction(
auto amount = tx->getFieldAmount(ripple::sfTakerGets);
if (account != amount.issue().account)
{
auto ownerFunds =
RPC::accountFunds(*backend_, lgrInfo.seq, amount, account);
auto ownerFunds = Backend::retryOnTimeout([&]() {
return RPC::accountFunds(
*backend_, lgrInfo.seq, amount, account);
});
pubObj["transaction"].as_object()["owner_funds"] =
ownerFunds.getText();
}

View File

@@ -282,6 +282,8 @@ public:
catch (Backend::DatabaseTimeout const& t)
{
BOOST_LOG_TRIVIAL(error) << __func__ << " Database timeout";
// TODO this should be a diff error code. Rippled probably
// does not have an analagous error code
return sendError(RPC::Error::rpcNOT_READY);
}
}