Postgres fixes (#84)

* Postgres fixes

* Create partial index for ledger_diffs, to avoid indexing the first
  ledger's objects

* Don't write duplicate keys to successor table

* default to 4 markers when syncing cache

* remove isFirst from writeLedger interface
This commit is contained in:
CJ Cobb
2022-01-07 14:48:48 -05:00
committed by GitHub
parent 628ede5bdb
commit 6e6f47421d
10 changed files with 84 additions and 65 deletions

View File

@@ -8,16 +8,8 @@ BackendInterface::finishWrites(uint32_t ledgerSequence)
auto commitRes = doFinishWrites(); auto commitRes = doFinishWrites();
if (commitRes) if (commitRes)
{ {
isFirst_ = false;
updateRange(ledgerSequence); updateRange(ledgerSequence);
} }
else
{
// if commitRes is false, we are relinquishing control of ETL. We
// reset isFirst_ to true so that way if we later regain control of
// ETL, we trigger the index repair
isFirst_ = true;
}
return commitRes; return commitRes;
} }
void void
@@ -242,8 +234,9 @@ BackendInterface::fetchLedgerPage(
std::vector<ripple::uint256> keys; std::vector<ripple::uint256> keys;
while (keys.size() < limit) while (keys.size() < limit)
{ {
ripple::uint256 const& curCursor = ripple::uint256 const& curCursor = keys.size() ? keys.back()
keys.size() ? keys.back() : cursor ? *cursor : firstKey; : cursor ? *cursor
: firstKey;
auto succ = fetchSuccessorKey(curCursor, ledgerSequence); auto succ = fetchSuccessorKey(curCursor, ledgerSequence);
if (!succ) if (!succ)
break; break;

View File

@@ -19,7 +19,6 @@ class DatabaseTimeout : public std::exception
class BackendInterface class BackendInterface
{ {
protected: protected:
bool isFirst_ = true;
std::optional<LedgerRange> range; std::optional<LedgerRange> range;
SimpleCache cache_; SimpleCache cache_;
@@ -153,8 +152,7 @@ public:
virtual void virtual void
writeLedger( writeLedger(
ripple::LedgerInfo const& ledgerInfo, ripple::LedgerInfo const& ledgerInfo,
std::string&& ledgerHeader, std::string&& ledgerHeader) = 0;
bool isFirst = false) = 0;
void void
writeLedgerObject(std::string&& key, uint32_t seq, std::string&& blob); writeLedgerObject(std::string&& key, uint32_t seq, std::string&& blob);

View File

@@ -151,7 +151,7 @@ CassandraBackend::doWriteLedgerObject(
std::string&& blob) std::string&& blob)
{ {
BOOST_LOG_TRIVIAL(trace) << "Writing ledger object to cassandra"; BOOST_LOG_TRIVIAL(trace) << "Writing ledger object to cassandra";
if (!isFirst_) if (range)
makeAndExecuteAsyncWrite( makeAndExecuteAsyncWrite(
this, std::move(std::make_tuple(seq, key)), [this](auto& params) { this, std::move(std::make_tuple(seq, key)), [this](auto& params) {
auto& [sequence, key] = params.data; auto& [sequence, key] = params.data;
@@ -201,8 +201,7 @@ CassandraBackend::writeSuccessor(
void void
CassandraBackend::writeLedger( CassandraBackend::writeLedger(
ripple::LedgerInfo const& ledgerInfo, ripple::LedgerInfo const& ledgerInfo,
std::string&& header, std::string&& header)
bool isFirst)
{ {
makeAndExecuteAsyncWrite( makeAndExecuteAsyncWrite(
this, this,
@@ -225,7 +224,6 @@ CassandraBackend::writeLedger(
return statement; return statement;
}); });
ledgerSequence_ = ledgerInfo.seq; ledgerSequence_ = ledgerInfo.seq;
isFirstLedger_ = isFirst;
} }
void void
CassandraBackend::writeAccountTransactions( CassandraBackend::writeAccountTransactions(
@@ -462,10 +460,9 @@ CassandraBackend::fetchAccountTransactions(
{ {
statement.bindNextIntTuple( statement.bindNextIntTuple(
cursor->ledgerSequence, cursor->transactionIndex); cursor->ledgerSequence, cursor->transactionIndex);
BOOST_LOG_TRIVIAL(debug) BOOST_LOG_TRIVIAL(debug) << " account = " << ripple::strHex(account)
<< " account = " << ripple::strHex(account) << " tuple = " << cursor->ledgerSequence
<< " tuple = " << cursor->ledgerSequence << " : " << " : " << cursor->transactionIndex;
<< cursor->transactionIndex;
} }
else else
{ {
@@ -474,8 +471,8 @@ CassandraBackend::fetchAccountTransactions(
statement.bindNextIntTuple(placeHolder, placeHolder); statement.bindNextIntTuple(placeHolder, placeHolder);
BOOST_LOG_TRIVIAL(debug) BOOST_LOG_TRIVIAL(debug)
<< " account = " << ripple::strHex(account) << " account = " << ripple::strHex(account) << " idx = " << seq
<< " idx = " << seq << " tuple = " << placeHolder; << " tuple = " << placeHolder;
} }
statement.bindNextUInt(limit); statement.bindNextUInt(limit);

View File

@@ -597,7 +597,6 @@ private:
boost::json::object config_; boost::json::object config_;
mutable uint32_t ledgerSequence_ = 0; mutable uint32_t ledgerSequence_ = 0;
mutable bool isFirstLedger_ = false;
public: public:
CassandraBackend(boost::json::object const& config) CassandraBackend(boost::json::object const& config)
@@ -648,7 +647,7 @@ public:
// wait for all other writes to finish // wait for all other writes to finish
sync(); sync();
// write range // write range
if (isFirstLedger_) if (!range)
{ {
CassandraStatement statement{updateLedgerRange_}; CassandraStatement statement{updateLedgerRange_};
statement.bindNextInt(ledgerSequence_); statement.bindNextInt(ledgerSequence_);
@@ -672,10 +671,8 @@ public:
return true; return true;
} }
void void
writeLedger( writeLedger(ripple::LedgerInfo const& ledgerInfo, std::string&& header)
ripple::LedgerInfo const& ledgerInfo, override;
std::string&& header,
bool isFirst = false) override;
std::optional<uint32_t> std::optional<uint32_t>
fetchLatestLedgerSequence() const override fetchLatestLedgerSequence() const override

View File

@@ -773,8 +773,6 @@ CREATE TABLE IF NOT EXISTS objects (
CREATE INDEX objects_idx ON objects USING btree(key,ledger_seq); CREATE INDEX objects_idx ON objects USING btree(key,ledger_seq);
CREATE INDEX diff ON objects USING hash(ledger_seq);
create table if not exists objects1 partition of objects for values from (0) to (10000000); create table if not exists objects1 partition of objects for values from (0) to (10000000);
create table if not exists objects2 partition of objects for values from (10000000) to (20000000); create table if not exists objects2 partition of objects for values from (10000000) to (20000000);
create table if not exists objects3 partition of objects for values from (20000000) to (30000000); create table if not exists objects3 partition of objects for values from (20000000) to (30000000);

View File

@@ -17,8 +17,7 @@ PostgresBackend::PostgresBackend(boost::json::object const& config)
void void
PostgresBackend::writeLedger( PostgresBackend::writeLedger(
ripple::LedgerInfo const& ledgerInfo, ripple::LedgerInfo const& ledgerInfo,
std::string&& ledgerHeader, std::string&& ledgerHeader)
bool isFirst)
{ {
auto cmd = boost::format( auto cmd = boost::format(
R"(INSERT INTO ledgers R"(INSERT INTO ledgers
@@ -35,6 +34,7 @@ PostgresBackend::writeLedger(
auto res = writeConnection_(ledgerInsert.data()); auto res = writeConnection_(ledgerInsert.data());
abortWrite_ = !res; abortWrite_ = !res;
inProcessLedger = ledgerInfo.seq;
} }
void void
@@ -88,9 +88,17 @@ PostgresBackend::writeSuccessor(
uint32_t seq, uint32_t seq,
std::string&& successor) std::string&& successor)
{ {
if (range)
{
if (successors_.count(key) > 0)
return;
successors_.insert(key);
}
successorBuffer_ << "\\\\x" << ripple::strHex(key) << '\t' successorBuffer_ << "\\\\x" << ripple::strHex(key) << '\t'
<< std::to_string(seq) << '\t' << "\\\\x" << std::to_string(seq) << '\t' << "\\\\x"
<< ripple::strHex(successor) << '\n'; << ripple::strHex(successor) << '\n';
BOOST_LOG_TRIVIAL(trace)
<< __func__ << ripple::strHex(key) << " - " << std::to_string(seq);
numRowsInSuccessorBuffer_++; numRowsInSuccessorBuffer_++;
if (numRowsInSuccessorBuffer_ % writeInterval_ == 0) if (numRowsInSuccessorBuffer_ % writeInterval_ == 0)
{ {
@@ -532,7 +540,7 @@ PostgresBackend::fetchLedgerDiff(uint32_t ledgerSequence) const
"WHERE " "WHERE "
<< "ledger_seq = " << std::to_string(ledgerSequence); << "ledger_seq = " << std::to_string(ledgerSequence);
auto res = pgQuery(sql.str().data()); auto res = pgQuery(sql.str().data());
if (size_t numRows = checkResult(res, 4)) if (size_t numRows = checkResult(res, 2))
{ {
std::vector<LedgerObject> objects; std::vector<LedgerObject> objects;
for (size_t i = 0; i < numRows; ++i) for (size_t i = 0; i < numRows; ++i)
@@ -666,6 +674,17 @@ PostgresBackend::doFinishWrites()
std::string successorStr = successorBuffer_.str(); std::string successorStr = successorBuffer_.str();
if (successorStr.size()) if (successorStr.size())
writeConnection_.bulkInsert("successor", successorStr); writeConnection_.bulkInsert("successor", successorStr);
successors_.clear();
if (!range)
{
std::stringstream indexCreate;
indexCreate
<< "CREATE INDEX diff ON objects USING hash(ledger_seq) "
"WHERE NOT "
"ledger_seq = "
<< std::to_string(inProcessLedger);
writeConnection_(indexCreate.str().data());
}
} }
auto res = writeConnection_("COMMIT"); auto res = writeConnection_("COMMIT");
if (!res || res.status() != PGRES_COMMAND_OK) if (!res || res.status() != PGRES_COMMAND_OK)

View File

@@ -11,7 +11,6 @@ private:
mutable std::stringstream objectsBuffer_; mutable std::stringstream objectsBuffer_;
mutable size_t numRowsInSuccessorBuffer_ = 0; mutable size_t numRowsInSuccessorBuffer_ = 0;
mutable std::stringstream successorBuffer_; mutable std::stringstream successorBuffer_;
mutable std::stringstream keysBuffer_;
mutable std::stringstream transactionsBuffer_; mutable std::stringstream transactionsBuffer_;
mutable std::stringstream accountTxBuffer_; mutable std::stringstream accountTxBuffer_;
std::shared_ptr<PgPool> pgPool_; std::shared_ptr<PgPool> pgPool_;
@@ -19,6 +18,8 @@ private:
mutable bool abortWrite_ = false; mutable bool abortWrite_ = false;
mutable boost::asio::thread_pool pool_{16}; mutable boost::asio::thread_pool pool_{16};
uint32_t writeInterval_ = 1000000; uint32_t writeInterval_ = 1000000;
uint32_t inProcessLedger = 0;
std::unordered_set<std::string> successors_;
public: public:
PostgresBackend(boost::json::object const& config); PostgresBackend(boost::json::object const& config);
@@ -75,8 +76,7 @@ public:
void void
writeLedger( writeLedger(
ripple::LedgerInfo const& ledgerInfo, ripple::LedgerInfo const& ledgerInfo,
std::string&& ledgerHeader, std::string&& ledgerHeader) override;
bool isFirst) override;
void void
doWriteLedgerObject(std::string&& key, uint32_t seq, std::string&& blob) doWriteLedgerObject(std::string&& key, uint32_t seq, std::string&& blob)

View File

@@ -599,6 +599,11 @@ public:
for (int i = 0; i < cur_->ledger_objects().objects_size(); ++i) for (int i = 0; i < cur_->ledger_objects().objects_size(); ++i)
{ {
auto& obj = *(cur_->mutable_ledger_objects()->mutable_objects(i)); auto& obj = *(cur_->mutable_ledger_objects()->mutable_objects(i));
if (!more)
{
if (((unsigned char)obj.key()[0]) >= nextPrefix_)
continue;
}
cacheUpdates.push_back( cacheUpdates.push_back(
{*ripple::uint256::fromVoidChecked(obj.key()), {*ripple::uint256::fromVoidChecked(obj.key()),
{obj.mutable_data()->begin(), obj.mutable_data()->end()}}); {obj.mutable_data()->begin(), obj.mutable_data()->end()}});
@@ -743,6 +748,9 @@ ETLSourceImpl<Derived>::loadInitialLedger(
auto start = std::chrono::system_clock::now(); auto start = std::chrono::system_clock::now();
for (auto& key : edgeKeys) for (auto& key : edgeKeys)
{ {
BOOST_LOG_TRIVIAL(debug)
<< __func__
<< " writing edge key = " << ripple::strHex(key);
auto succ = backend_->cache().getSuccessor( auto succ = backend_->cache().getSuccessor(
*ripple::uint256::fromVoidChecked(key), sequence); *ripple::uint256::fromVoidChecked(key), sequence);
if (succ) if (succ)
@@ -761,18 +769,23 @@ ETLSourceImpl<Derived>::loadInitialLedger(
if (isBookDir(cur->key, cur->blob)) if (isBookDir(cur->key, cur->blob))
{ {
auto base = getBookBase(cur->key); auto base = getBookBase(cur->key);
auto succ = backend_->cache().getSuccessor(base, sequence); // make sure the base is not an actual object
assert(succ); if (!backend_->cache().get(cur->key, sequence))
if (succ->key == cur->key)
{ {
BOOST_LOG_TRIVIAL(debug) auto succ =
<< __func__ << " Writing book successor = " backend_->cache().getSuccessor(base, sequence);
<< ripple::strHex(base) << " - " assert(succ);
<< ripple::strHex(cur->key); if (succ->key == cur->key)
backend_->writeSuccessor( {
uint256ToString(base), BOOST_LOG_TRIVIAL(debug)
sequence, << __func__ << " Writing book successor = "
uint256ToString(cur->key)); << ripple::strHex(base) << " - "
<< ripple::strHex(cur->key);
backend_->writeSuccessor(
uint256ToString(base),
sequence,
uint256ToString(cur->key));
}
} }
++numWrites; ++numWrites;
} }
@@ -848,6 +861,10 @@ ETLLoadBalancer::ETLLoadBalancer(
downloadRanges_ = std::clamp(downloadRanges_, {1}, {256}); downloadRanges_ = std::clamp(downloadRanges_, {1}, {256});
} }
else if (backend->fetchLedgerRange())
{
downloadRanges_ = 4;
}
for (auto& entry : config.at("etl_sources").as_array()) for (auto& entry : config.at("etl_sources").as_array())
{ {
@@ -923,7 +940,9 @@ ETLLoadBalancer::fetchLedger(
} }
std::optional<boost::json::object> std::optional<boost::json::object>
ETLLoadBalancer::forwardToRippled(boost::json::object const& request, std::string const& clientIp) const ETLLoadBalancer::forwardToRippled(
boost::json::object const& request,
std::string const& clientIp) const
{ {
srand((unsigned)time(0)); srand((unsigned)time(0));
auto sourceIdx = rand() % sources_.size(); auto sourceIdx = rand() % sources_.size();
@@ -942,7 +961,8 @@ ETLLoadBalancer::forwardToRippled(boost::json::object const& request, std::strin
template <class Derived> template <class Derived>
std::optional<boost::json::object> std::optional<boost::json::object>
ETLSourceImpl<Derived>::forwardToRippled( ETLSourceImpl<Derived>::forwardToRippled(
boost::json::object const& request, std::string const& clientIp) const boost::json::object const& request,
std::string const& clientIp) const
{ {
BOOST_LOG_TRIVIAL(debug) << "Attempting to forward request to tx. " BOOST_LOG_TRIVIAL(debug) << "Attempting to forward request to tx. "
<< "request = " << boost::json::serialize(request); << "request = " << boost::json::serialize(request);
@@ -983,17 +1003,14 @@ ETLSourceImpl<Derived>::forwardToRippled(
// //
// https://github.com/ripple/rippled/blob/develop/cfg/rippled-example.cfg // https://github.com/ripple/rippled/blob/develop/cfg/rippled-example.cfg
ws->set_option(websocket::stream_base::decorator( ws->set_option(websocket::stream_base::decorator(
[&request,&clientIp] (websocket::request_type& req) { [&request, &clientIp](websocket::request_type& req) {
req.set( req.set(
http::field::user_agent, http::field::user_agent,
std::string(BOOST_BEAST_VERSION_STRING) + std::string(BOOST_BEAST_VERSION_STRING) +
" websocket-client-coro"); " websocket-client-coro");
req.set( req.set(http::field::forwarded, "for=" + clientIp);
http::field::forwarded,
"for=" + clientIp);
})); }));
BOOST_LOG_TRIVIAL(debug) BOOST_LOG_TRIVIAL(debug) << "client ip: " << clientIp;
<< "client ip: " << clientIp;
BOOST_LOG_TRIVIAL(debug) << "Performing websocket handshake"; BOOST_LOG_TRIVIAL(debug) << "Performing websocket handshake";
// Perform the websocket handshake // Perform the websocket handshake

View File

@@ -11,8 +11,8 @@
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <subscriptions/SubscriptionManager.h> #include <subscriptions/SubscriptionManager.h>
#include <variant>
#include <thread> #include <thread>
#include <variant>
namespace detail { namespace detail {
/// Convenience function for printing out basic ledger info /// Convenience function for printing out basic ledger info
@@ -101,7 +101,7 @@ ReportingETL::loadInitialLedger(uint32_t startingSequence)
backend_->startWrites(); backend_->startWrites();
BOOST_LOG_TRIVIAL(debug) << __func__ << " started writes"; BOOST_LOG_TRIVIAL(debug) << __func__ << " started writes";
backend_->writeLedger( backend_->writeLedger(
lgrInfo, std::move(*ledgerData->mutable_ledger_header()), true); lgrInfo, std::move(*ledgerData->mutable_ledger_header()));
BOOST_LOG_TRIVIAL(debug) << __func__ << " wrote ledger"; BOOST_LOG_TRIVIAL(debug) << __func__ << " wrote ledger";
std::vector<AccountTransactionsData> accountTxData = std::vector<AccountTransactionsData> accountTxData =
insertTransactions(lgrInfo, *ledgerData); insertTransactions(lgrInfo, *ledgerData);
@@ -864,14 +864,14 @@ ReportingETL::monitorReadOnly()
if (!mostRecent) if (!mostRecent)
return; return;
uint32_t sequence = *mostRecent; uint32_t sequence = *mostRecent;
std::thread t{[this, sequence]() {
BOOST_LOG_TRIVIAL(info) << "Loading cache";
loadBalancer_->loadInitialLedger(sequence, true);
}};
t.detach();
while (!stopping_ && while (!stopping_ &&
networkValidatedLedgers_->waitUntilValidatedByNetwork(sequence)) networkValidatedLedgers_->waitUntilValidatedByNetwork(sequence))
{ {
std::thread t{[this, sequence]() {
BOOST_LOG_TRIVIAL(info) << "Loading cache";
loadBalancer_->loadInitialLedger(sequence, true);
}};
t.detach();
publishLedger(sequence, {}); publishLedger(sequence, {});
++sequence; ++sequence;
} }

View File

@@ -85,7 +85,7 @@ TEST(BackendTest, Basic)
deserializeHeader(ripple::makeSlice(rawHeaderBlob)); deserializeHeader(ripple::makeSlice(rawHeaderBlob));
backend->startWrites(); backend->startWrites();
backend->writeLedger(lgrInfo, std::move(rawHeaderBlob), true); backend->writeLedger(lgrInfo, std::move(rawHeaderBlob));
backend->writeSuccessor( backend->writeSuccessor(
uint256ToString(Backend::firstKey), uint256ToString(Backend::firstKey),
lgrInfo.seq, lgrInfo.seq,
@@ -1590,7 +1590,7 @@ TEST(Backend, CacheIntegration)
deserializeHeader(ripple::makeSlice(rawHeaderBlob)); deserializeHeader(ripple::makeSlice(rawHeaderBlob));
backend->startWrites(); backend->startWrites();
backend->writeLedger(lgrInfo, std::move(rawHeaderBlob), true); backend->writeLedger(lgrInfo, std::move(rawHeaderBlob));
backend->writeSuccessor( backend->writeSuccessor(
uint256ToString(Backend::firstKey), uint256ToString(Backend::firstKey),
lgrInfo.seq, lgrInfo.seq,