index during ETL. not tested

This commit is contained in:
CJ Cobb
2021-04-08 20:07:05 +00:00
parent 5f9e5d03f4
commit d9a8ff5399
8 changed files with 289 additions and 54 deletions

View File

@@ -58,6 +58,7 @@ target_sources(reporting PRIVATE
reporting/ETLSource.cpp reporting/ETLSource.cpp
reporting/CassandraBackend.cpp reporting/CassandraBackend.cpp
reporting/PostgresBackend.cpp reporting/PostgresBackend.cpp
reporting/BackendIndexer.cpp
reporting/Pg.cpp reporting/Pg.cpp
reporting/DBHelpers.cpp reporting/DBHelpers.cpp
reporting/ReportingETL.cpp reporting/ReportingETL.cpp

View File

@@ -0,0 +1,76 @@
#include <reporting/BackendInterface.h>
namespace Backend {
BackendIndexer::BackendIndexer(boost::json::object const& config)
: keyShift_(config.at("keyshift").as_int64())
, bookShift_(config.at("bookshift").as_int64())
{
work_.emplace(ioc_);
ioThread_ = std::thread{[this]() { ioc_.run(); }};
};
BackendIndexer::~BackendIndexer()
{
std::unique_lock lck(mutex_);
work_.reset();
ioThread_.join();
}
void
BackendIndexer::addKey(ripple::uint256 const& key)
{
keys.insert(key);
}
void
BackendIndexer::deleteKey(ripple::uint256 const& key)
{
keys.erase(key);
}
void
BackendIndexer::addBookOffer(
ripple::uint256 const& book,
ripple::uint256 const& offerKey)
{
booksToOffers[book].insert(offerKey);
}
void
BackendIndexer::deleteBookOffer(
ripple::uint256 const& book,
ripple::uint256 const& offerKey)
{
booksToOffers[book].erase(offerKey);
booksToDeletedOffers[book].insert(offerKey);
}
void
BackendIndexer::finish(uint32_t ledgerSequence, BackendInterface const& backend)
{
if (ledgerSequence >> keyShift_ << keyShift_ == ledgerSequence)
{
std::unordered_set<ripple::uint256> keysCopy = keys;
boost::asio::post(ioc_, [=, &backend]() {
BOOST_LOG_TRIVIAL(info) << "Indexer - writing keys. Ledger = "
<< std::to_string(ledgerSequence);
backend.writeKeys(keysCopy, ledgerSequence);
BOOST_LOG_TRIVIAL(info) << "Indexer - wrote keys. Ledger = "
<< std::to_string(ledgerSequence);
});
}
if (ledgerSequence >> bookShift_ << bookShift_ == ledgerSequence)
{
std::unordered_map<ripple::uint256, std::unordered_set<ripple::uint256>>
booksToOffersCopy = booksToOffers;
std::unordered_map<ripple::uint256, std::unordered_set<ripple::uint256>>
booksToDeletedOffersCopy = booksToDeletedOffers;
boost::asio::post(ioc_, [=, &backend]() {
BOOST_LOG_TRIVIAL(info) << "Indexer - writing books. Ledger = "
<< std::to_string(ledgerSequence);
backend.writeBooks(booksToOffersCopy, ledgerSequence);
backend.writeBooks(booksToDeletedOffersCopy, ledgerSequence);
BOOST_LOG_TRIVIAL(info) << "Indexer - wrote books. Ledger = "
<< std::to_string(ledgerSequence);
});
booksToDeletedOffers = {};
}
}
} // namespace Backend

View File

@@ -1,7 +1,19 @@
#ifndef RIPPLE_APP_REPORTING_BACKENDINTERFACE_H_INCLUDED #ifndef RIPPLE_APP_REPORTING_BACKENDINTERFACE_H_INCLUDED
#define RIPPLE_APP_REPORTING_BACKENDINTERFACE_H_INCLUDED #define RIPPLE_APP_REPORTING_BACKENDINTERFACE_H_INCLUDED
#include <ripple/ledger/ReadView.h> #include <ripple/ledger/ReadView.h>
#include <boost/asio.hpp>
#include <reporting/DBHelpers.h> #include <reporting/DBHelpers.h>
namespace std {
template <>
struct hash<ripple::uint256>
{
std::size_t
operator()(const ripple::uint256& k) const noexcept
{
return boost::hash_range(k.begin(), k.end());
}
};
} // namespace std
namespace Backend { namespace Backend {
using Blob = std::vector<unsigned char>; using Blob = std::vector<unsigned char>;
struct LedgerObject struct LedgerObject
@@ -42,11 +54,51 @@ class DatabaseTimeout : public std::exception
return "Database read timed out. Please retry the request"; return "Database read timed out. Please retry the request";
} }
}; };
class BackendInterface;
class BackendIndexer
{
boost::asio::io_context ioc_;
std::mutex mutex_;
std::optional<boost::asio::io_context::work> work_;
std::thread ioThread_;
uint32_t keyShift_ = 16;
uint32_t bookShift_ = 16;
std::unordered_set<ripple::uint256> keys;
std::unordered_map<ripple::uint256, std::unordered_set<ripple::uint256>>
booksToOffers;
std::unordered_map<ripple::uint256, std::unordered_set<ripple::uint256>>
booksToDeletedOffers;
public:
BackendIndexer(boost::json::object const& config);
~BackendIndexer();
void
addKey(ripple::uint256 const& key);
void
deleteKey(ripple::uint256 const& key);
void
addBookOffer(ripple::uint256 const& book, ripple::uint256 const& offerKey);
void
deleteBookOffer(
ripple::uint256 const& book,
ripple::uint256 const& offerKey);
void
finish(uint32_t ledgerSequence, BackendInterface const& backend);
};
class BackendInterface class BackendInterface
{ {
private:
mutable BackendIndexer indexer_;
public: public:
// read methods // read methods
BackendInterface(boost::json::object const& config) : indexer_(config)
{
}
virtual std::optional<uint32_t> virtual std::optional<uint32_t>
fetchLatestLedgerSequence() const = 0; fetchLatestLedgerSequence() const = 0;
@@ -107,8 +159,37 @@ public:
std::string&& ledgerHeader, std::string&& ledgerHeader,
bool isFirst = false) const = 0; bool isFirst = false) const = 0;
virtual void void
writeLedgerObject( writeLedgerObject(
std::string&& key,
uint32_t seq,
std::string&& blob,
bool isCreated,
bool isDeleted,
std::optional<ripple::uint256>&& book) const
{
ripple::uint256 key256 = ripple::uint256::fromVoid(key.data());
if (isCreated)
indexer_.addKey(key256);
if (isDeleted)
indexer_.deleteKey(key256);
if (book)
{
if (isCreated)
indexer_.addBookOffer(*book, key256);
if (isDeleted)
indexer_.deleteBookOffer(*book, key256);
}
doWriteLedgerObject(
std::move(key),
seq,
std::move(blob),
isCreated,
isDeleted,
std::move(book));
}
virtual void
doWriteLedgerObject(
std::string&& key, std::string&& key,
uint32_t seq, uint32_t seq,
std::string&& blob, std::string&& blob,
@@ -141,11 +222,27 @@ public:
virtual void virtual void
startWrites() const = 0; startWrites() const = 0;
bool
finishWrites(uint32_t ledgerSequence) const
{
indexer_.finish(ledgerSequence, *this);
return doFinishWrites();
}
virtual bool virtual bool
finishWrites() const = 0; doFinishWrites() const = 0;
virtual bool virtual bool
doOnlineDelete(uint32_t minLedgerToKeep) const = 0; doOnlineDelete(uint32_t minLedgerToKeep) const = 0;
virtual bool
writeKeys(
std::unordered_set<ripple::uint256> const& keys,
uint32_t ledgerSequence) const = 0;
virtual bool
writeBooks(
std::unordered_map<
ripple::uint256,
std::unordered_set<ripple::uint256>> const& books,
uint32_t ledgerSequence) const = 0;
virtual ~BackendInterface() virtual ~BackendInterface()
{ {

View File

@@ -2,6 +2,7 @@
#include <reporting/CassandraBackend.h> #include <reporting/CassandraBackend.h>
#include <reporting/DBHelpers.h> #include <reporting/DBHelpers.h>
#include <unordered_map> #include <unordered_map>
/*
namespace std { namespace std {
template <> template <>
struct hash<ripple::uint256> struct hash<ripple::uint256>
@@ -13,6 +14,7 @@ struct hash<ripple::uint256>
} }
}; };
} // namespace std } // namespace std
*/
namespace Backend { namespace Backend {
template <class T, class F> template <class T, class F>
void void
@@ -842,7 +844,7 @@ writeKeyCallback(CassFuture* fut, void* cbData)
bool bool
CassandraBackend::writeKeys( CassandraBackend::writeKeys(
std::unordered_set<ripple::uint256>& keys, std::unordered_set<ripple::uint256> const& keys,
uint32_t ledgerSequence) const uint32_t ledgerSequence) const
{ {
BOOST_LOG_TRIVIAL(info) BOOST_LOG_TRIVIAL(info)
@@ -889,26 +891,27 @@ CassandraBackend::writeKeys(
bool bool
CassandraBackend::writeBooks( CassandraBackend::writeBooks(
std::unordered_map<ripple::uint256, std::unordered_set<ripple::uint256>>& std::unordered_map<
books, ripple::uint256,
uint32_t ledgerSequence, std::unordered_set<ripple::uint256>> const& books,
uint32_t numOffers) const uint32_t ledgerSequence) const
{ {
BOOST_LOG_TRIVIAL(info) BOOST_LOG_TRIVIAL(info)
<< __func__ << " Ledger = " << std::to_string(ledgerSequence) << __func__ << " Ledger = " << std::to_string(ledgerSequence)
<< " . num books = " << std::to_string(books.size()) << " . num books = " << std::to_string(books.size());
<< " . num offers = " << std::to_string(numOffers);
std::atomic_uint32_t numRemaining = numOffers;
std::condition_variable cv; std::condition_variable cv;
std::mutex mtx; std::mutex mtx;
std::vector<std::shared_ptr<WriteBookCallbackData>> cbs; std::vector<std::shared_ptr<WriteBookCallbackData>> cbs;
uint32_t concurrentLimit = maxRequestsOutstanding / 2; uint32_t concurrentLimit = maxRequestsOutstanding / 2;
uint32_t numSubmitted = 0; std::atomic_uint32_t numOutstanding = 0;
size_t count = 0;
auto start = std::chrono::system_clock::now(); auto start = std::chrono::system_clock::now();
for (auto& book : books) for (auto& book : books)
{ {
for (auto& offer : book.second) for (auto& offer : book.second)
{ {
++numOutstanding;
++count;
cbs.push_back(std::make_shared<WriteBookCallbackData>( cbs.push_back(std::make_shared<WriteBookCallbackData>(
*this, *this,
book.first, book.first,
@@ -916,40 +919,25 @@ CassandraBackend::writeBooks(
ledgerSequence, ledgerSequence,
cv, cv,
mtx, mtx,
numRemaining)); numOutstanding));
writeBook2(*cbs.back()); writeBook2(*cbs.back());
++numSubmitted;
BOOST_LOG_TRIVIAL(trace) << __func__ << "Submitted a write request"; BOOST_LOG_TRIVIAL(trace) << __func__ << "Submitted a write request";
std::unique_lock<std::mutex> lck(mtx); std::unique_lock<std::mutex> lck(mtx);
BOOST_LOG_TRIVIAL(trace) << __func__ << "Got the mutex"; BOOST_LOG_TRIVIAL(trace) << __func__ << "Got the mutex";
cv.wait( cv.wait(lck, [&numOutstanding, concurrentLimit]() {
lck, return numOutstanding < concurrentLimit;
[&numRemaining, numSubmitted, concurrentLimit, numOffers]() {
BOOST_LOG_TRIVIAL(trace)
<< std::to_string(numSubmitted) << " "
<< std::to_string(numRemaining) << " "
<< std::to_string(numOffers) << " "
<< std::to_string(concurrentLimit);
return (numSubmitted - (numOffers - numRemaining)) <
concurrentLimit;
}); });
if (numSubmitted % 1000 == 0)
BOOST_LOG_TRIVIAL(debug)
<< __func__ << " Submitted " << std::to_string(numSubmitted)
<< " write requests. Completed "
<< (numOffers - numRemaining);
} }
} }
BOOST_LOG_TRIVIAL(info) << __func__ BOOST_LOG_TRIVIAL(info) << __func__
<< "Submitted all book writes. Waiting for them to " << "Submitted all book writes. Waiting for them to "
"finish. num submitted = " "finish. num submitted = "
<< std::to_string(numSubmitted); << std::to_string(count);
std::unique_lock<std::mutex> lck(mtx); std::unique_lock<std::mutex> lck(mtx);
cv.wait(lck, [&numRemaining]() { return numRemaining == 0; }); cv.wait(lck, [&numOutstanding]() { return numOutstanding == 0; });
BOOST_LOG_TRIVIAL(info) << __func__ << "Finished writing books"; BOOST_LOG_TRIVIAL(info) << __func__ << "Finished writing books";
return true; return true;
} }
bool bool
CassandraBackend::isIndexed(uint32_t ledgerSequence) const CassandraBackend::isIndexed(uint32_t ledgerSequence) const
{ {
@@ -986,6 +974,8 @@ CassandraBackend::getNextToIndex() const
bool bool
CassandraBackend::runIndexer(uint32_t ledgerSequence) const CassandraBackend::runIndexer(uint32_t ledgerSequence) const
{ {
return false;
/*
auto start = std::chrono::system_clock::now(); auto start = std::chrono::system_clock::now();
constexpr uint32_t limit = 2048; constexpr uint32_t limit = 2048;
std::unordered_set<ripple::uint256> keys; std::unordered_set<ripple::uint256> keys;
@@ -1091,7 +1081,6 @@ CassandraBackend::runIndexer(uint32_t ledgerSequence) const
size_t numOffersDeleted = 0; size_t numOffersDeleted = 0;
// Get the diff and update keys // Get the diff and update keys
std::vector<LedgerObject> objs; std::vector<LedgerObject> objs;
std::unordered_set<ripple::uint256> deleted;
std::vector<uint32_t> sequences(256, 0); std::vector<uint32_t> sequences(256, 0);
std::iota(sequences.begin(), sequences.end(), i + 1); std::iota(sequences.begin(), sequences.end(), i + 1);
@@ -1104,7 +1093,6 @@ CassandraBackend::runIndexer(uint32_t ledgerSequence) const
if (obj.blob.size() == 0) if (obj.blob.size() == 0)
{ {
keys.erase(obj.key); keys.erase(obj.key);
deleted.insert(obj.key);
if (offers.count(obj.key) > 0) if (offers.count(obj.key) > 0)
{ {
auto book = offers[obj.key]; auto book = offers[obj.key];
@@ -1115,8 +1103,8 @@ CassandraBackend::runIndexer(uint32_t ledgerSequence) const
} }
else else
{ {
// insert other keys. keys is a set, so this is a noop // insert other keys. keys is a set, so this is a
// if obj.key is already in keys // noop if obj.key is already in keys
keys.insert(obj.key); keys.insert(obj.key);
// if the object is an offer, add to books // if the object is an offer, add to books
if (isOffer(obj.blob)) if (isOffer(obj.blob))
@@ -1167,8 +1155,8 @@ CassandraBackend::runIndexer(uint32_t ledgerSequence) const
nextLedgerSequence = prevLedgerSequence + (1 << indexerShift_); nextLedgerSequence = prevLedgerSequence + (1 << indexerShift_);
} }
return true; return true;
*/
} }
bool bool
CassandraBackend::doOnlineDelete(uint32_t minLedgerToKeep) const CassandraBackend::doOnlineDelete(uint32_t minLedgerToKeep) const
{ {
@@ -1469,7 +1457,8 @@ CassandraBackend::open()
query = {}; query = {};
query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "books2" query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "books2"
<< " ( book blob, sequence bigint, key blob, PRIMARY KEY " << " ( book blob, sequence bigint, key blob, PRIMARY KEY "
"((book, sequence), key)) WITH CLUSTERING ORDER BY (key ASC)"; "((book, sequence), key)) WITH CLUSTERING ORDER BY (key "
"ASC)";
if (!executeSimpleStatement(query.str())) if (!executeSimpleStatement(query.str()))
continue; continue;
query = {}; query = {};
@@ -1783,5 +1772,5 @@ CassandraBackend::open()
open_ = true; open_ = true;
BOOST_LOG_TRIVIAL(info) << "Opened database successfully"; BOOST_LOG_TRIVIAL(info) << "Opened database successfully";
} } // namespace Backend
} // namespace Backend } // namespace Backend

View File

@@ -655,7 +655,8 @@ private:
mutable bool isFirstLedger_ = false; mutable bool isFirstLedger_ = false;
public: public:
CassandraBackend(boost::json::object const& config) : config_(config) CassandraBackend(boost::json::object const& config)
: BackendInterface(config), config_(config)
{ {
} }
@@ -798,7 +799,7 @@ public:
}; };
bool bool
finishWrites() const override doFinishWrites() const override
{ {
// wait for all other writes to finish // wait for all other writes to finish
sync(); sync();
@@ -973,15 +974,14 @@ public:
bool bool
writeKeys( writeKeys(
std::unordered_set<ripple::uint256>& keys, std::unordered_set<ripple::uint256> const& keys,
uint32_t ledgerSequence) const; uint32_t ledgerSequence) const;
bool bool
writeBooks( writeBooks(
std::unordered_map< std::unordered_map<
ripple::uint256, ripple::uint256,
std::unordered_set<ripple::uint256>>& books, std::unordered_set<ripple::uint256>> const& books,
uint32_t ledgerSequence, uint32_t ledgerSequence) const override;
uint32_t numOffers) const;
std::pair<std::vector<LedgerObject>, std::optional<ripple::uint256>> std::pair<std::vector<LedgerObject>, std::optional<ripple::uint256>>
fetchBookOffers( fetchBookOffers(
ripple::uint256 const& book, ripple::uint256 const& book,
@@ -1270,7 +1270,7 @@ public:
executeAsyncWrite(statement, flatMapWriteBookCallback, data, isRetry); executeAsyncWrite(statement, flatMapWriteBookCallback, data, isRetry);
} }
void void
writeLedgerObject( doWriteLedgerObject(
std::string&& key, std::string&& key,
uint32_t seq, uint32_t seq,
std::string&& blob, std::string&& blob,

View File

@@ -3,7 +3,9 @@
namespace Backend { namespace Backend {
PostgresBackend::PostgresBackend(boost::json::object const& config) PostgresBackend::PostgresBackend(boost::json::object const& config)
: pgPool_(make_PgPool(config)), writeConnection_(pgPool_) : BackendInterface(config)
, pgPool_(make_PgPool(config))
, writeConnection_(pgPool_)
{ {
} }
void void
@@ -50,7 +52,7 @@ PostgresBackend::writeAccountTransactions(
} }
} }
void void
PostgresBackend::writeLedgerObject( PostgresBackend::doWriteLedgerObject(
std::string&& key, std::string&& key,
uint32_t seq, uint32_t seq,
std::string&& blob, std::string&& blob,
@@ -553,7 +555,7 @@ PostgresBackend::startWrites() const
} }
bool bool
PostgresBackend::finishWrites() const PostgresBackend::doFinishWrites() const
{ {
if (!abortWrite_) if (!abortWrite_)
{ {
@@ -584,6 +586,66 @@ PostgresBackend::finishWrites() const
return !abortWrite_; return !abortWrite_;
} }
bool bool
PostgresBackend::writeKeys(
std::unordered_set<ripple::uint256> const& keys,
uint32_t ledgerSequence) const
{
PgQuery pgQuery(pgPool_);
std::stringstream keysBuffer;
size_t numRows = 0;
for (auto& key : keys)
{
keysBuffer << std::to_string(ledgerSequence) << '\t' << "\\\\x"
<< ripple::strHex(key) << '\n';
numRows++;
// If the buffer gets too large, the insert fails. Not sure why. So we
// insert after 1 million records
if (numRows == 1000000)
{
pgQuery.bulkInsert("keys", keysBuffer.str());
keysBuffer = {};
numRows = 0;
}
}
if (numRows > 0)
{
pgQuery.bulkInsert("keys", keysBuffer.str());
}
}
bool
PostgresBackend::writeBooks(
std::unordered_map<
ripple::uint256,
std::unordered_set<ripple::uint256>> const& books,
uint32_t ledgerSequence) const
{
PgQuery pgQuery(pgPool_);
std::stringstream booksBuffer;
size_t numRows = 0;
for (auto& book : books)
{
for (auto& offer : book.second)
{
booksBuffer << "\\\\x" << ripple::strHex(book.first) << '\t'
<< std::to_string(ledgerSequence) << '\t' << "\\\\x"
<< ripple::strHex(offer) << '\n';
numRows++;
// If the buffer gets too large, the insert fails. Not sure why. So
// we insert after 1 million records
if (numRows == 1000000)
{
pgQuery.bulkInsert("books", booksBuffer.str());
booksBuffer = {};
numRows = 0;
}
}
}
if (numRows > 0)
{
pgQuery.bulkInsert("books", booksBuffer.str());
}
}
bool
PostgresBackend::doOnlineDelete(uint32_t minLedgerToKeep) const PostgresBackend::doOnlineDelete(uint32_t minLedgerToKeep) const
{ {
uint32_t limit = 2048; uint32_t limit = 2048;

View File

@@ -79,7 +79,7 @@ public:
bool isFirst) const override; bool isFirst) const override;
void void
writeLedgerObject( doWriteLedgerObject(
std::string&& key, std::string&& key,
uint32_t seq, uint32_t seq,
std::string&& blob, std::string&& blob,
@@ -108,10 +108,20 @@ public:
startWrites() const override; startWrites() const override;
bool bool
finishWrites() const override; doFinishWrites() const override;
bool bool
doOnlineDelete(uint32_t minLedgerToKeep) const override; doOnlineDelete(uint32_t minLedgerToKeep) const override;
bool
writeKeys(
std::unordered_set<ripple::uint256> const& keys,
uint32_t ledgerSequence) const override;
bool
writeBooks(
std::unordered_map<
ripple::uint256,
std::unordered_set<ripple::uint256>> const& books,
uint32_t ledgerSequence) const override;
}; };
} // namespace Backend } // namespace Backend
#endif #endif

View File

@@ -131,7 +131,7 @@ ReportingETL::loadInitialLedger(uint32_t startingSequence)
{ {
flatMapBackend_->writeAccountTransactions(std::move(accountTxData)); flatMapBackend_->writeAccountTransactions(std::move(accountTxData));
} }
flatMapBackend_->finishWrites(); flatMapBackend_->finishWrites(startingSequence);
auto end = std::chrono::system_clock::now(); auto end = std::chrono::system_clock::now();
BOOST_LOG_TRIVIAL(debug) << "Time to download and store ledger = " BOOST_LOG_TRIVIAL(debug) << "Time to download and store ledger = "
<< ((end - start).count()) / 1000000000.0; << ((end - start).count()) / 1000000000.0;
@@ -298,7 +298,7 @@ ReportingETL::buildNextLedger(org::xrpl::rpc::v1::GetLedgerResponse& rawData)
std::move(bookDir)); std::move(bookDir));
} }
flatMapBackend_->writeAccountTransactions(std::move(accountTxData)); flatMapBackend_->writeAccountTransactions(std::move(accountTxData));
bool success = flatMapBackend_->finishWrites(); bool success = flatMapBackend_->finishWrites(lgrInfo.seq);
BOOST_LOG_TRIVIAL(debug) BOOST_LOG_TRIVIAL(debug)
<< __func__ << " : " << __func__ << " : "
<< "Inserted/modified/deleted all objects. Number of objects = " << "Inserted/modified/deleted all objects. Number of objects = "