Introduce ShardArchiveHandler improvements:

* Improve documentation
* Make the ShardArchiveHandler rather than the DatabaseShardImp perform
  LastLedgerHash verification for downloaded shards
* Remove ShardArchiveHandler's singleton implementation and make it an
  Application member
* Have the Application invoke ShardArchiveHandler initialization
  instead of clients
* Add RecoveryHandler as a ShardArchiveHandler derived class
* Improve commenting
This commit is contained in:
Devon White
2020-05-15 17:21:16 -04:00
committed by manojsdoshi
parent 21340a1c1e
commit ac766ec0eb
22 changed files with 977 additions and 926 deletions

View File

@@ -513,7 +513,6 @@ target_sources (rippled PRIVATE
src/ripple/nodestore/impl/EncodedBlob.cpp src/ripple/nodestore/impl/EncodedBlob.cpp
src/ripple/nodestore/impl/ManagerImp.cpp src/ripple/nodestore/impl/ManagerImp.cpp
src/ripple/nodestore/impl/NodeObject.cpp src/ripple/nodestore/impl/NodeObject.cpp
src/ripple/nodestore/impl/RetryFinalize.cpp
src/ripple/nodestore/impl/Shard.cpp src/ripple/nodestore/impl/Shard.cpp
src/ripple/nodestore/impl/TaskQueue.cpp src/ripple/nodestore/impl/TaskQueue.cpp
#[===============================[ #[===============================[
@@ -623,6 +622,7 @@ target_sources (rippled PRIVATE
src/ripple/rpc/impl/Role.cpp src/ripple/rpc/impl/Role.cpp
src/ripple/rpc/impl/ServerHandlerImp.cpp src/ripple/rpc/impl/ServerHandlerImp.cpp
src/ripple/rpc/impl/ShardArchiveHandler.cpp src/ripple/rpc/impl/ShardArchiveHandler.cpp
src/ripple/rpc/impl/ShardVerificationScheduler.cpp
src/ripple/rpc/impl/Status.cpp src/ripple/rpc/impl/Status.cpp
src/ripple/rpc/impl/TransactionSign.cpp src/ripple/rpc/impl/TransactionSign.cpp
@@ -837,7 +837,7 @@ target_sources (rippled PRIVATE
test sources: test sources:
subdir: net subdir: net
#]===============================] #]===============================]
src/test/net/SSLHTTPDownloader_test.cpp src/test/net/DatabaseDownloader_test.cpp
#[===============================[ #[===============================[
test sources: test sources:
subdir: nodestore subdir: nodestore

View File

@@ -349,6 +349,7 @@ public:
detail::AppFamily family_; detail::AppFamily family_;
std::unique_ptr<NodeStore::DatabaseShard> shardStore_; std::unique_ptr<NodeStore::DatabaseShard> shardStore_;
std::unique_ptr<detail::AppFamily> shardFamily_; std::unique_ptr<detail::AppFamily> shardFamily_;
std::unique_ptr<RPC::ShardArchiveHandler> shardArchiveHandler_;
// VFALCO TODO Make OrderBookDB abstract // VFALCO TODO Make OrderBookDB abstract
OrderBookDB m_orderBookDB; OrderBookDB m_orderBookDB;
std::unique_ptr<PathRequests> m_pathRequests; std::unique_ptr<PathRequests> m_pathRequests;
@@ -786,6 +787,64 @@ public:
return shardStore_.get(); return shardStore_.get();
} }
RPC::ShardArchiveHandler*
getShardArchiveHandler(bool tryRecovery) override
{
static std::mutex handlerMutex;
std::lock_guard lock(handlerMutex);
// After constructing the handler, try to
// initialize it. Log on error; set the
// member variable on success.
auto initAndSet =
[this](std::unique_ptr<RPC::ShardArchiveHandler>&& handler) {
if (!handler)
return false;
if (!handler->init())
{
JLOG(m_journal.error())
<< "Failed to initialize ShardArchiveHandler.";
return false;
}
shardArchiveHandler_ = std::move(handler);
return true;
};
// Need to resume based on state from a previous
// run.
if (tryRecovery)
{
if (shardArchiveHandler_ != nullptr)
{
JLOG(m_journal.error())
<< "ShardArchiveHandler already created at startup.";
return nullptr;
}
auto handler = RPC::ShardArchiveHandler::tryMakeRecoveryHandler(
*this, *m_jobQueue);
if (!initAndSet(std::move(handler)))
return nullptr;
}
// Construct the ShardArchiveHandler
if (shardArchiveHandler_ == nullptr)
{
auto handler = RPC::ShardArchiveHandler::makeShardArchiveHandler(
*this, *m_jobQueue);
if (!initAndSet(std::move(handler)))
return nullptr;
}
return shardArchiveHandler_.get();
}
Application::MutexType& Application::MutexType&
getMasterMutex() override getMasterMutex() override
{ {
@@ -1714,30 +1773,16 @@ ApplicationImp::setup()
if (shardStore_) if (shardStore_)
{ {
using namespace boost::filesystem;
auto stateDb(
RPC::ShardArchiveHandler::getDownloadDirectory(*config_) /
stateDBName);
try try
{ {
if (exists(stateDb) && is_regular_file(stateDb) && // Create a ShardArchiveHandler if recovery
!RPC::ShardArchiveHandler::hasInstance()) // is needed (there's a state database left
// over from a previous run).
auto handler = getShardArchiveHandler(true);
// Recovery is needed.
if (handler)
{ {
auto handler = RPC::ShardArchiveHandler::recoverInstance(
*this, *m_jobQueue);
assert(handler);
if (!handler->initFromDB())
{
JLOG(m_journal.fatal())
<< "Failed to initialize ShardArchiveHandler.";
return false;
}
if (!handler->start()) if (!handler->start())
{ {
JLOG(m_journal.fatal()) JLOG(m_journal.fatal())

View File

@@ -46,6 +46,9 @@ class DatabaseShard;
namespace perf { namespace perf {
class PerfLog; class PerfLog;
} }
namespace RPC {
class ShardArchiveHandler;
}
// VFALCO TODO Fix forward declares required for header dependency loops // VFALCO TODO Fix forward declares required for header dependency loops
class AmendmentTable; class AmendmentTable;
@@ -185,6 +188,8 @@ public:
getNodeStore() = 0; getNodeStore() = 0;
virtual NodeStore::DatabaseShard* virtual NodeStore::DatabaseShard*
getShardStore() = 0; getShardStore() = 0;
virtual RPC::ShardArchiveHandler*
getShardArchiveHandler(bool tryRecovery = false) = 0;
virtual InboundLedgers& virtual InboundLedgers&
getInboundLedgers() = 0; getInboundLedgers() = 0;
virtual InboundTransactions& virtual InboundTransactions&

View File

@@ -28,6 +28,10 @@
namespace ripple { namespace ripple {
// DatabaseBody needs to meet requirements
// from asio which is why some conventions
// used elsewhere in this code base are not
// followed.
struct DatabaseBody struct DatabaseBody
{ {
// Algorithm for storing buffers when parsing. // Algorithm for storing buffers when parsing.
@@ -53,15 +57,15 @@ class DatabaseBody::value_type
friend struct DatabaseBody; friend struct DatabaseBody;
// The cached file size // The cached file size
std::uint64_t file_size_ = 0; std::uint64_t fileSize_ = 0;
boost::filesystem::path path_; boost::filesystem::path path_;
std::unique_ptr<DatabaseCon> conn_; std::unique_ptr<DatabaseCon> conn_;
std::string batch_; std::string batch_;
std::shared_ptr<boost::asio::io_service::strand> strand_; std::shared_ptr<boost::asio::io_service::strand> strand_;
std::mutex m_; std::mutex m_;
std::condition_variable c_; std::condition_variable c_;
uint64_t handler_count_ = 0; std::uint64_t handlerCount_ = 0;
uint64_t part_ = 0; std::uint64_t part_ = 0;
bool closing_ = false; bool closing_ = false;
public: public:
@@ -75,14 +79,14 @@ public:
bool bool
is_open() const is_open() const
{ {
return bool{conn_}; return static_cast<bool>(conn_);
} }
/// Returns the size of the file if open /// Returns the size of the file if open
std::uint64_t std::uint64_t
size() const size() const
{ {
return file_size_; return fileSize_;
} }
/// Close the file if open /// Close the file if open
@@ -93,7 +97,9 @@ public:
@param path The utf-8 encoded path to the file @param path The utf-8 encoded path to the file
@param mode The file mode to use @param config The configuration settings
@param io_service The asio context for running a strand.
@param ec Set to the error, if any occurred @param ec Set to the error, if any occurred
*/ */
@@ -114,9 +120,9 @@ class DatabaseBody::reader
{ {
value_type& body_; // The body we are writing to value_type& body_; // The body we are writing to
static const uint32_t FLUSH_SIZE = 50000000; static constexpr std::uint32_t FLUSH_SIZE = 50000000;
static const uint8_t MAX_HANDLERS = 3; static constexpr std::uint8_t MAX_HANDLERS = 3;
static const uint16_t MAX_ROW_SIZE_PAD = 500; static constexpr std::uint16_t MAX_ROW_SIZE_PAD = 500;
public: public:
// Constructor. // Constructor.

View File

@@ -34,7 +34,8 @@ public:
Config const& config); Config const& config);
private: private:
static const uint8_t MAX_PATH_LEN = std::numeric_limits<uint8_t>::max(); static const std::uint8_t MAX_PATH_LEN =
std::numeric_limits<std::uint8_t>::max();
std::shared_ptr<parser> std::shared_ptr<parser>
getParser( getParser(
@@ -48,7 +49,7 @@ private:
void void
closeBody(std::shared_ptr<parser> p) override; closeBody(std::shared_ptr<parser> p) override;
uint64_t std::uint64_t
size(std::shared_ptr<parser> p) override; size(std::shared_ptr<parser> p) override;
Config const& config_; Config const& config_;

View File

@@ -41,7 +41,7 @@ namespace ripple {
/** Provides an asynchronous HTTPS file downloader /** Provides an asynchronous HTTPS file downloader
*/ */
class SSLHTTPDownloader : public std::enable_shared_from_this<SSLHTTPDownloader> class SSLHTTPDownloader
{ {
public: public:
using error_code = boost::system::error_code; using error_code = boost::system::error_code;
@@ -49,8 +49,7 @@ public:
SSLHTTPDownloader( SSLHTTPDownloader(
boost::asio::io_service& io_service, boost::asio::io_service& io_service,
beast::Journal j, beast::Journal j,
Config const& config, Config const& config);
bool isPaused = false);
bool bool
download( download(
@@ -71,13 +70,13 @@ protected:
beast::Journal const j_; beast::Journal const j_;
bool void
fail( fail(
boost::filesystem::path dstPath, boost::filesystem::path dstPath,
std::function<void(boost::filesystem::path)> const& complete, std::function<void(boost::filesystem::path)> const& complete,
boost::system::error_code const& ec, boost::system::error_code const& ec,
std::string const& errMsg, std::string const& errMsg,
std::shared_ptr<parser> parser = nullptr); std::shared_ptr<parser> parser);
private: private:
HTTPClientSSLContext ssl_ctx_; HTTPClientSSLContext ssl_ctx_;
@@ -85,9 +84,11 @@ private:
boost::optional<boost::asio::ssl::stream<boost::asio::ip::tcp::socket>> boost::optional<boost::asio::ssl::stream<boost::asio::ip::tcp::socket>>
stream_; stream_;
boost::beast::flat_buffer read_buf_; boost::beast::flat_buffer read_buf_;
std::atomic<bool> isStopped_; std::atomic<bool> cancelDownloads_;
bool sessionActive_;
// Used to protect sessionActive_
std::mutex m_; std::mutex m_;
bool sessionActive_;
std::condition_variable c_; std::condition_variable c_;
void void

View File

@@ -1,17 +1,18 @@
# Shard Downloader Process # Shard Downloader
## Overview ## Overview
This document describes mechanics of the `SSLHTTPDownloader`, a class that performs the task of downloading shards from remote web servers via This document describes mechanics of the `SSLHTTPDownloader`, a class that performs the task of downloading shards from remote web servers via
SSL HTTP. The downloader utilizes a strand (`boost::asio::io_service::strand`) to ensure that downloads are never executed concurrently. Hence, if a download is in progress when another download is initiated, the second download will be queued and invoked only when the first download is completed. SSL HTTP. The downloader utilizes a strand (`boost::asio::io_service::strand`) to ensure that downloads are never executed concurrently. Hence, if a download is in progress when another download is initiated, the second download will be queued and invoked only when the first download is completed.
## New Features ## Motivation
The downloader has been recently (March 2020) been modified to provide some key features: In March 2020 the downloader was modified to include some key features:
- The ability to stop downloads during a graceful shutdown. - The ability to stop downloads during a graceful shutdown.
- The ability to resume partial downloads after a crash or shutdown. - The ability to resume partial downloads after a crash or shutdown.
- <span style="color:gray">*(Deferred) The ability to download from multiple servers to a single file.*</span>
This document was created to document the changes introduced by this change.
## Classes ## Classes
@@ -42,17 +43,15 @@ Much of the shard downloading process concerns the following classes:
start(); start();
``` ```
When a client submits a `download_shard` command via the RPC interface, each of the requested files is registered with the handler via the `add` method. After all the files have been registered, the handler's `start` method is invoked, which in turn creates an instance of the `SSLHTTPDownloader` and begins the first download. When the download is completed, the downloader invokes the handler's `complete` method, which will initiate the download of the next file, or simply return if there are no more downloads to process. When `complete` is invoked with no remaining files to be downloaded, the handler and downloader are not destroyed automatically, but persist for the duration of the application. When a client submits a `download_shard` command via the RPC interface, each of the requested files is registered with the handler via the `add` method. After all the files have been registered, the handler's `start` method is invoked, which in turn creates an instance of the `SSLHTTPDownloader` and begins the first download. When the download is completed, the downloader invokes the handler's `complete` method, which will initiate the download of the next file, or simply return if there are no more downloads to process. When `complete` is invoked with no remaining files to be downloaded, the handler and downloader are not destroyed automatically, but persist for the duration of the application to assist with graceful shutdowns by `Stoppable`.
Additionally, we leverage a novel class to provide customized parsing for downloaded files:
- `DatabaseBody` - `DatabaseBody`
This class will define a custom message body type, allowing an `http::response_parser` to write to a SQLite database rather than to a flat file. This class is discussed in further detail in the Recovery section. This class defines a custom message body type, allowing an `http::response_parser` to write to an SQLite database rather than to a flat file. This class is discussed in further detail in the Recovery section.
## Execution Concept ## Graceful Shutdowns & Recovery
This section describes in greater detail how the key features of the downloader are implemented in C++ using the `boost::asio` framework. This section describes in greater detail how the shutdown and recovery features of the downloader are implemented in C++ using the `boost::asio` framework.
##### Member Variables: ##### Member Variables:
@@ -65,7 +64,7 @@ using boost::asio::ip::tcp::socket;
stream<socket> stream_; stream<socket> stream_;
std::condition_variable c_; std::condition_variable c_;
std::atomic<bool> isStopped_; std::atomic<bool> cancelDownloads_;
``` ```
### Graceful Shutdowns ### Graceful Shutdowns
@@ -90,7 +89,7 @@ ShardArchiveHandler::onStop()
} }
``` ```
Inside of `SSLHTTPDownloader::onStop()`, if a download is currently in progress, the `isStopped_` member variable is set and the thread waits for the download to stop: Inside of `SSLHTTPDownloader::onStop()`, if a download is currently in progress, the `cancelDownloads_` member variable is set and the thread waits for the download to stop:
```c++ ```c++
void void
@@ -98,7 +97,7 @@ SSLHTTPDownloader::onStop()
{ {
std::unique_lock lock(m_); std::unique_lock lock(m_);
isStopped_ = true; cancelDownloads_ = true;
if(sessionActive_) if(sessionActive_)
{ {
@@ -114,28 +113,23 @@ SSLHTTPDownloader::onStop()
##### Thread 2: ##### Thread 2:
The graceful shutdown is realized when the thread executing the download polls `isStopped_` after this variable has been set to `true`. Polling only occurs while the file is being downloaded, in between calls to `async_read_some()`. The stop takes effect when the socket is closed and the handler function ( `do_session()` ) is exited. The graceful shutdown is realized when the thread executing the download polls `cancelDownloads_` after this variable has been set to `true`. Polling occurs while the file is being downloaded, in between calls to `async_read_some()`. The stop takes effect when the socket is closed and the handler function ( `do_session()` ) is exited.
```c++ ```c++
void SSLHTTPDownloader::do_session() void SSLHTTPDownloader::do_session()
{ {
// (Connection initialization logic) // (Connection initialization logic) . . .
.
.
.
// (In between calls to async_read_some): // (In between calls to async_read_some):
if(isStopped_.load()) if(cancelDownloads_.load())
{ {
close(p); close(p);
return exit(); return exit();
} }
. // . . .
.
.
break; break;
} }
@@ -143,11 +137,11 @@ void SSLHTTPDownloader::do_session()
### Recovery ### Recovery
Persisting the current state of both the archive handler and the downloader is achieved by leveraging a SQLite database rather than flat files, as the database protects against data corruption that could result from a system crash. Persisting the current state of both the archive handler and the downloader is achieved by leveraging an SQLite database rather than flat files, as the database protects against data corruption that could result from a system crash.
##### ShardArchiveHandler ##### ShardArchiveHandler
Although `SSLHTTPDownloader` is a generic class that could be used to download a variety of file types, currently it is used exclusively by the `ShardArchiveHandler` to download shards. In order to provide resilience, the `ShardArchiveHandler` will utilize a SQLite database to preserve its current state whenever there are active, paused, or queued downloads. The `shard_db` section in the configuration file allows users to specify the location of the database to use for this purpose. Although `SSLHTTPDownloader` is a generic class that could be used to download a variety of file types, currently it is used exclusively by the `ShardArchiveHandler` to download shards. In order to provide resilience, the `ShardArchiveHandler` will use an SQLite database to preserve its current state whenever there are active, paused, or queued downloads. The `shard_db` section in the configuration file allows users to specify the location of the database to use for this purpose.
###### SQLite Table Format ###### SQLite Table Format
@@ -159,11 +153,11 @@ Although `SSLHTTPDownloader` is a generic class that could be used to download a
##### SSLHTTPDownloader ##### SSLHTTPDownloader
While the archive handler maintains a list of all partial and queued downloads, the `SSLHTTPDownloader` stores the raw bytes of the file currently being downloaded. The partially downloaded file will be represented as one or more `BLOB` entries in a SQLite database. As the maximum size of a `BLOB` entry is currently limited to roughly 2.1 GB, a 5 GB shard file for instance will occupy three database entries upon completion. While the archive handler maintains a list of all partial and queued downloads, the `SSLHTTPDownloader` stores the raw bytes of the file currently being downloaded. The partially downloaded file will be represented as one or more `BLOB` entries in an SQLite database. As the maximum size of a `BLOB` entry is currently limited to roughly 2.1 GB, a 5 GB shard file for instance will occupy three database entries upon completion.
###### SQLite Table Format ###### SQLite Table Format
Since downloads execute serially by design, the entries in this table always correspond to the content of a single file. Since downloads execute serially by design, the entries in this table always correspond to the contents of a single file.
| Bytes | Size | Part | | Bytes | Size | Part |
|:------:|:----------:|:----:| |:------:|:----------:|:----:|
@@ -172,7 +166,7 @@ Since downloads execute serially by design, the entries in this table always cor
| 0x... | 705032706 | 2 | | 0x... | 705032706 | 2 |
##### Config File Entry ##### Config File Entry
The `download_path` field of the `shard_db` entry will be used to determine where to store the recovery database. If this field is omitted, the `path` field will be used instead. The `download_path` field of the `shard_db` entry is used to determine where to store the recovery database. If this field is omitted, the `path` field will be used instead.
```dosini ```dosini
# This is the persistent datastore for shards. It is important for the health # This is the persistent datastore for shards. It is important for the health
@@ -187,7 +181,7 @@ max_size_gb=50
``` ```
##### Resuming Partial Downloads ##### Resuming Partial Downloads
When resuming downloads after a crash or other interruption, the `SSLHTTPDownloader` will utilize the `range` field of the HTTP header to download only the remainder of the partially downloaded file. When resuming downloads after a shutdown, crash, or other interruption, the `SSLHTTPDownloader` will utilize the `range` field of the HTTP header to download only the remainder of the partially downloaded file.
```C++ ```C++
auto downloaded = getPartialFileSize(); auto downloaded = getPartialFileSize();
@@ -199,14 +193,14 @@ http::request<http::file_body> req {http::verb::head,
if (downloaded < total) if (downloaded < total)
{ {
// If we already download 1000 bytes to the partial file, // If we already downloaded 1000 bytes to the database,
// the range header will look like: // the range header will look like:
// Range: "bytes=1000-" // Range: "bytes=1000-"
req.set(http::field::range, "bytes=" + to_string(downloaded) + "-"); req.set(http::field::range, "bytes=" + to_string(downloaded) + "-");
} }
else if(downloaded == total) else if(downloaded == total)
{ {
// Download is already complete. (Interruption Must // Download is already complete. (Interruption must
// have occurred after file was downloaded but before // have occurred after file was downloaded but before
// the state file was updated.) // the state file was updated.)
} }
@@ -242,6 +236,7 @@ struct body
} }
``` ```
Note that the `DatabaseBody` class is specifically designed to work with `asio` and follows `asio` conventions.
The method invoked to write data to the filesystem (or SQLite database in our case) has the following signature: The method invoked to write data to the filesystem (or SQLite database in our case) has the following signature:
@@ -252,7 +247,7 @@ body::reader::put(ConstBufferSequence const& buffers, error_code& ec);
## Sequence Diagram ## Sequence Diagram
This sequence diagram demonstrates a scenario wherein the `ShardArchiveHandler` leverages the state persisted in the database to recover from a crash and resume the scheduled downloads. This sequence diagram demonstrates a scenario wherein the `ShardArchiveHandler` leverages the state persisted in the database to recover from a crash and resume the requested downloads.
![alt_text](./images/interrupt_sequence.png "Resuming downloads post abort") ![alt_text](./images/interrupt_sequence.png "Resuming downloads post abort")

View File

@@ -27,11 +27,11 @@ DatabaseBody::value_type::close()
// Stop all scheduled and currently // Stop all scheduled and currently
// executing handlers before closing. // executing handlers before closing.
if (handler_count_) if (handlerCount_)
{ {
closing_ = true; closing_ = true;
auto predicate = [&] { return !handler_count_; }; auto predicate = [&] { return !handlerCount_; };
c_.wait(lock, predicate); c_.wait(lock, predicate);
} }
@@ -76,12 +76,12 @@ DatabaseBody::value_type::open(
// Continuing a file download. // Continuing a file download.
else else
{ {
boost::optional<uint64_t> size; boost::optional<std::uint64_t> size;
*db << "SELECT SUM(LENGTH(Data)) FROM Download;", soci::into(size); *db << "SELECT SUM(LENGTH(Data)) FROM Download;", soci::into(size);
if (size) if (size)
file_size_ = size.get(); fileSize_ = size.get();
} }
} }
} }
@@ -155,10 +155,10 @@ DatabaseBody::reader::put(
{ {
std::lock_guard lock(body_.m_); std::lock_guard lock(body_.m_);
if (body_.handler_count_ >= MAX_HANDLERS) if (body_.handlerCount_ >= MAX_HANDLERS)
post = false; post = false;
else else
++body_.handler_count_; ++body_.handlerCount_;
} }
if (post) if (post)
@@ -191,7 +191,7 @@ DatabaseBody::reader::do_put(std::string data)
// The download is being halted. // The download is being halted.
if (body_.closing_) if (body_.closing_)
{ {
if (--body_.handler_count_ == 0) if (--body_.handlerCount_ == 0)
{ {
lock.unlock(); lock.unlock();
body_.c_.notify_one(); body_.c_.notify_one();
@@ -202,10 +202,10 @@ DatabaseBody::reader::do_put(std::string data)
} }
auto path = body_.path_.string(); auto path = body_.path_.string();
uint64_t rowSize; std::uint64_t rowSize = 0;
soci::indicator rti; soci::indicator rti;
uint64_t remainingInRow; std::uint64_t remainingInRow = 0;
auto db = body_.conn_->checkoutDb(); auto db = body_.conn_->checkoutDb();
@@ -236,9 +236,9 @@ DatabaseBody::reader::do_put(std::string data)
else else
remainingInRow = blobMaxSize - rowSize; remainingInRow = blobMaxSize - rowSize;
auto insert = [&db, &rowSize, &part = body_.part_, &fs = body_.file_size_]( auto insert = [&db, &rowSize, &part = body_.part_, &fs = body_.fileSize_](
auto const& data) { auto const& data) {
uint64_t updatedSize = rowSize + data.size(); std::uint64_t updatedSize = rowSize + data.size();
*db << "UPDATE Download SET Data = CAST(Data || :data AS blob), " *db << "UPDATE Download SET Data = CAST(Data || :data AS blob), "
"Size = :size WHERE Part = :part;", "Size = :size WHERE Part = :part;",
@@ -263,7 +263,7 @@ DatabaseBody::reader::do_put(std::string data)
bool const notify = [this] { bool const notify = [this] {
std::lock_guard lock(body_.m_); std::lock_guard lock(body_.m_);
return --body_.handler_count_ == 0; return --body_.handlerCount_ == 0;
}(); }();
if (notify) if (notify)
@@ -279,9 +279,9 @@ DatabaseBody::reader::finish(boost::system::error_code& ec)
// Wait for scheduled DB writes // Wait for scheduled DB writes
// to complete. // to complete.
if (body_.handler_count_) if (body_.handlerCount_)
{ {
auto predicate = [&] { return !body_.handler_count_; }; auto predicate = [&] { return !body_.handlerCount_; };
body_.c_.wait(lock, predicate); body_.c_.wait(lock, predicate);
} }
} }

View File

@@ -45,7 +45,7 @@ DatabaseDownloader::getParser(
if (ec) if (ec)
{ {
p->get().body().close(); p->get().body().close();
fail(dstPath, complete, ec, "open"); fail(dstPath, complete, ec, "open", nullptr);
} }
return p; return p;
@@ -69,7 +69,7 @@ DatabaseDownloader::closeBody(std::shared_ptr<parser> p)
databaseBodyParser->get().body().close(); databaseBodyParser->get().body().close();
} }
uint64_t std::uint64_t
DatabaseDownloader::size(std::shared_ptr<parser> p) DatabaseDownloader::size(std::shared_ptr<parser> p)
{ {
using namespace boost::beast; using namespace boost::beast;

View File

@@ -25,12 +25,11 @@ namespace ripple {
SSLHTTPDownloader::SSLHTTPDownloader( SSLHTTPDownloader::SSLHTTPDownloader(
boost::asio::io_service& io_service, boost::asio::io_service& io_service,
beast::Journal j, beast::Journal j,
Config const& config, Config const& config)
bool isPaused)
: j_(j) : j_(j)
, ssl_ctx_(config, j, boost::asio::ssl::context::tlsv12_client) , ssl_ctx_(config, j, boost::asio::ssl::context::tlsv12_client)
, strand_(io_service) , strand_(io_service)
, isStopped_(false) , cancelDownloads_(false)
, sessionActive_(false) , sessionActive_(false)
{ {
} }
@@ -47,10 +46,19 @@ SSLHTTPDownloader::download(
if (!checkPath(dstPath)) if (!checkPath(dstPath))
return false; return false;
{
std::lock_guard lock(m_);
if (cancelDownloads_)
return true;
sessionActive_ = true;
}
if (!strand_.running_in_this_thread()) if (!strand_.running_in_this_thread())
strand_.post(std::bind( strand_.post(std::bind(
&SSLHTTPDownloader::download, &SSLHTTPDownloader::download,
this->shared_from_this(), this,
host, host,
port, port,
target, target,
@@ -62,7 +70,7 @@ SSLHTTPDownloader::download(
strand_, strand_,
std::bind( std::bind(
&SSLHTTPDownloader::do_session, &SSLHTTPDownloader::do_session,
this->shared_from_this(), this,
host, host,
port, port,
target, target,
@@ -78,7 +86,7 @@ SSLHTTPDownloader::onStop()
{ {
std::unique_lock lock(m_); std::unique_lock lock(m_);
isStopped_ = true; cancelDownloads_ = true;
if (sessionActive_) if (sessionActive_)
{ {
@@ -106,13 +114,57 @@ SSLHTTPDownloader::do_session(
////////////////////////////////////////////// //////////////////////////////////////////////
// Define lambdas for encapsulating download // Define lambdas for encapsulating download
// operations: // operations:
auto connect = [&](std::shared_ptr<parser> parser) { auto close = [&](auto p) {
uint64_t const rangeStart = size(parser); closeBody(p);
// Gracefully close the stream
stream_->async_shutdown(yield[ec]);
if (ec == boost::asio::error::eof)
ec.assign(0, ec.category());
if (ec)
{
// Most web servers don't bother with performing
// the SSL shutdown handshake, for speed.
JLOG(j_.trace()) << "async_shutdown: " << ec.message();
}
// The socket cannot be reused
stream_ = boost::none;
};
// When the downloader is being stopped
// because the server is shutting down,
// this method notifies a 'Stoppable'
// object that the session has ended.
auto exit = [this]() {
std::lock_guard<std::mutex> lock(m_);
sessionActive_ = false;
c_.notify_one();
};
auto failAndExit = [&exit, &dstPath, complete, &ec, this](
std::string const& errMsg, auto p) {
exit();
fail(dstPath, complete, ec, errMsg, p);
};
// end lambdas
////////////////////////////////////////////////////////////
if (cancelDownloads_.load())
return exit();
auto p = this->getParser(dstPath, complete, ec);
if (ec)
return failAndExit("getParser", p);
//////////////////////////////////////////////
// Prepare for download and establish the
// connection:
std::uint64_t const rangeStart = size(p);
ip::tcp::resolver resolver{strand_.context()}; ip::tcp::resolver resolver{strand_.context()};
auto const results = resolver.async_resolve(host, port, yield[ec]); auto const results = resolver.async_resolve(host, port, yield[ec]);
if (ec) if (ec)
return fail(dstPath, complete, ec, "async_resolve", parser); return failAndExit("async_resolve", p);
try try
{ {
@@ -120,30 +172,25 @@ SSLHTTPDownloader::do_session(
} }
catch (std::exception const& e) catch (std::exception const& e)
{ {
return fail( return failAndExit(std::string("exception: ") + e.what(), p);
dstPath,
complete,
ec,
std::string("exception: ") + e.what(),
parser);
} }
ec = ssl_ctx_.preConnectVerify(*stream_, host); ec = ssl_ctx_.preConnectVerify(*stream_, host);
if (ec) if (ec)
return fail(dstPath, complete, ec, "preConnectVerify", parser); return failAndExit("preConnectVerify", p);
boost::asio::async_connect( boost::asio::async_connect(
stream_->next_layer(), results.begin(), results.end(), yield[ec]); stream_->next_layer(), results.begin(), results.end(), yield[ec]);
if (ec) if (ec)
return fail(dstPath, complete, ec, "async_connect", parser); return failAndExit("async_connect", p);
ec = ssl_ctx_.postConnectVerify(*stream_, host); ec = ssl_ctx_.postConnectVerify(*stream_, host);
if (ec) if (ec)
return fail(dstPath, complete, ec, "postConnectVerify", parser); return failAndExit("postConnectVerify", p);
stream_->async_handshake(ssl::stream_base::client, yield[ec]); stream_->async_handshake(ssl::stream_base::client, yield[ec]);
if (ec) if (ec)
return fail(dstPath, complete, ec, "async_handshake", parser); return failAndExit("async_handshake", p);
// Set up an HTTP HEAD request message to find the file size // Set up an HTTP HEAD request message to find the file size
http::request<http::empty_body> req{http::verb::head, target, version}; http::request<http::empty_body> req{http::verb::head, target, version};
@@ -160,81 +207,62 @@ SSLHTTPDownloader::do_session(
http::async_write(*stream_, req, yield[ec]); http::async_write(*stream_, req, yield[ec]);
if (ec) if (ec)
return fail(dstPath, complete, ec, "async_write", parser); return failAndExit("async_write", p);
{ {
// Check if available storage for file size // Read the response
http::response_parser<http::empty_body> p; http::response_parser<http::empty_body> connectParser;
p.skip(true); connectParser.skip(true);
http::async_read(*stream_, read_buf_, p, yield[ec]); http::async_read(*stream_, read_buf_, connectParser, yield[ec]);
if (ec) if (ec)
return fail(dstPath, complete, ec, "async_read", parser); return failAndExit("async_read", p);
// Range request was rejected // Range request was rejected
if (p.get().result() == http::status::range_not_satisfiable) if (connectParser.get().result() == http::status::range_not_satisfiable)
{ {
req.erase(http::field::range); req.erase(http::field::range);
http::async_write(*stream_, req, yield[ec]); http::async_write(*stream_, req, yield[ec]);
if (ec) if (ec)
return fail( return failAndExit("async_write_range_verify", p);
dstPath,
complete,
ec,
"async_write_range_verify",
parser);
http::response_parser<http::empty_body> p; http::response_parser<http::empty_body> rangeParser;
p.skip(true); rangeParser.skip(true);
http::async_read(*stream_, read_buf_, p, yield[ec]); http::async_read(*stream_, read_buf_, rangeParser, yield[ec]);
if (ec) if (ec)
return fail( return failAndExit("async_read_range_verify", p);
dstPath,
complete,
ec,
"async_read_range_verify",
parser);
// The entire file is downloaded already. // The entire file is downloaded already.
if (p.content_length() == rangeStart) if (rangeParser.content_length() == rangeStart)
skip = true; skip = true;
else else
return fail( return failAndExit("range_not_satisfiable", p);
dstPath, complete, ec, "range_not_satisfiable", parser);
} }
else if ( else if (
rangeStart && p.get().result() != http::status::partial_content) rangeStart &&
connectParser.get().result() != http::status::partial_content)
{ {
ec.assign( ec.assign(
boost::system::errc::not_supported, boost::system::errc::not_supported,
boost::system::generic_category()); boost::system::generic_category());
return fail( return failAndExit("Range request ignored", p);
dstPath, complete, ec, "Range request ignored", parser);
} }
else if (auto len = p.content_length()) else if (auto len = connectParser.content_length())
{ {
try try
{ {
// Ensure sufficient space is available
if (*len > space(dstPath.parent_path()).available) if (*len > space(dstPath.parent_path()).available)
{ {
return fail( return failAndExit(
dstPath, "Insufficient disk space for download", p);
complete,
ec,
"Insufficient disk space for download",
parser);
} }
} }
catch (std::exception const& e) catch (std::exception const& e)
{ {
return fail( return failAndExit(std::string("exception: ") + e.what(), p);
dstPath,
complete,
ec,
std::string("exception: ") + e.what(),
parser);
} }
} }
} }
@@ -254,71 +282,18 @@ SSLHTTPDownloader::do_session(
http::async_write(*stream_, req, yield[ec]); http::async_write(*stream_, req, yield[ec]);
if (ec) if (ec)
return fail(dstPath, complete, ec, "async_write", parser); return failAndExit("async_write", p);
return true; // end prepare and connect
};
auto close = [&](auto p) {
closeBody(p);
// Gracefully close the stream
stream_->async_shutdown(yield[ec]);
if (ec == boost::asio::error::eof)
ec.assign(0, ec.category());
if (ec)
{
// Most web servers don't bother with performing
// the SSL shutdown handshake, for speed.
JLOG(j_.trace()) << "async_shutdown: " << ec.message();
}
// The socket cannot be reused
stream_ = boost::none;
};
auto getParser = [&] {
auto p = this->getParser(dstPath, complete, ec);
if (ec)
fail(dstPath, complete, ec, "getParser", p);
return p;
};
// When the downloader is being stopped
// because the server is shutting down,
// this method notifies a 'Stoppable'
// object that the session has ended.
auto exit = [this]() {
std::lock_guard<std::mutex> lock(m_);
sessionActive_ = false;
c_.notify_one();
};
// end lambdas
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
{
std::lock_guard<std::mutex> lock(m_);
sessionActive_ = true;
}
if (isStopped_.load())
return exit();
auto p = getParser();
if (ec)
return exit();
if (!connect(p) || ec)
return exit();
if (skip) if (skip)
p->skip(true); p->skip(true);
// Download the file // Download the file
while (!p->is_done()) while (!p->is_done())
{ {
if (isStopped_.load()) if (cancelDownloads_.load())
{ {
close(p); close(p);
return exit(); return exit();
@@ -336,7 +311,7 @@ SSLHTTPDownloader::do_session(
complete(std::move(dstPath)); complete(std::move(dstPath));
} }
bool void
SSLHTTPDownloader::fail( SSLHTTPDownloader::fail(
boost::filesystem::path dstPath, boost::filesystem::path dstPath,
std::function<void(boost::filesystem::path)> const& complete, std::function<void(boost::filesystem::path)> const& complete,
@@ -366,8 +341,6 @@ SSLHTTPDownloader::fail(
<< " in function: " << __func__; << " in function: " << __func__;
} }
complete(std::move(dstPath)); complete(std::move(dstPath));
return false;
} }
} // namespace ripple } // namespace ripple

View File

@@ -1,61 +0,0 @@
//------------------------------------------------------------------------------
/*
This file is part of rippled: https://github.com/ripple/rippled
Copyright (c) 2020 Ripple Labs Inc.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef RIPPLE_NODESTORE_RETRYFINALIZE_H_INCLUDED
#define RIPPLE_NODESTORE_RETRYFINALIZE_H_INCLUDED
#include <ripple/app/main/Application.h>
#include <functional>
namespace ripple {
namespace NodeStore {
class RetryFinalize
{
public:
using retryFunction = std::function<void(std::uint32_t shardIndex)>;
RetryFinalize() = default;
bool
retry(Application& app, retryFunction f, std::uint32_t shardIndex);
// Must match the imported shard's last ledger hash
uint256 referenceHash{0};
private:
using waitable_timer =
boost::asio::basic_waitable_timer<std::chrono::steady_clock>;
static constexpr std::chrono::seconds retryInterval_ =
std::chrono::seconds{60};
// Maximum attempts to retrieve a shard's last ledger hash
static constexpr uint32_t maxAttempts_{5};
std::unique_ptr<waitable_timer> timer_;
// Number of attempts to retrieve a shard's last ledger hash
std::uint32_t numAttempts_{0};
};
} // namespace NodeStore
} // namespace ripple
#endif // RIPPLE_NODESTORE_RETRYFINALIZE_H_INCLUDED

View File

@@ -2,129 +2,32 @@
## Overview ## Overview
In order to validate shards that have been downloaded from file servers (as opposed to shards acquired from peers), the application must confirm the validity of the downloaded shard's last ledger. The following sections describe this confirmation process in greater detail. In order to validate shards that have been downloaded from file servers (as opposed to shards acquired from peers), the application must confirm the validity of the downloaded shard's last ledger. So before initiating the download, we first confirm that we are able to retrieve the shard's last ledger hash. The following sections describe this confirmation process in greater detail.
## Execution Concept ## Hash Verification
### Flag Ledger ### Flag Ledger
Since the number of ledgers contained in each shard is always a multiple of 256, a shard's last ledger is always a flag ledger. Conveniently, the application provides a mechanism for retrieving the hash for a given flag ledger: Since the number of ledgers contained in each shard is always a multiple of 256, a shard's last ledger is always a flag ledger. Conveniently, the skip list stored within a ledger will provide us with a series of flag ledger hashes, enabling the software to corroborate a shard's last ledger hash. We access the skip list by calling `LedgerMaster::walkHashBySeq` and providing the sequence of a shard's last ledger:
```C++ ```C++
boost::optional<uint256> boost::optional<uint256> expectedHash;
hashOfSeq (ReadView const& ledger, expectedHash =
LedgerIndex seq, app_.getLedgerMaster().walkHashBySeq(lastLedgerSeq(shardIndex));
beast::Journal journal)
``` ```
When validating downloaded shards, we use this function to retrieve the hash of the shard's last ledger. If the function returns a hash that matches the hash stored in the shard, validation of the shard can proceed. When a user requests a shard download, the `ShardArchiveHandler` will first use this function to retrieve the hash of the shard's last ledger. If the function returns a hash, downloading the shard can proceed. Once the download completes, the server can reliably retrieve this last ledger hash to complete validation of the shard.
### Caveats ### Caveats
#### Later Ledger #### Later Ledger
The `getHashBySeq` function will provide the hash of a flag ledger only if the application has stored a later ledger. When validating a downloaded shard, if there is no later ledger stored, validation of the shard will be deferred until a later ledger has been stored. The `walkHashBySeq` function will provide the hash of a flag ledger only if the application has stored a later ledger. When verifying the last ledger hash of a pending shard download, if there is no later ledger stored, the download will be deferred until a later ledger has been stored.
We employ a simple heuristic for determining whether the application has stored a ledger later than the last ledger of the downloaded shard: We use the presence (or absence) of a validated ledger with a sequence number later than the sequence of the shard's last ledger as a heuristic for determining whether or not we should have the shard's last ledger hash. A later ledger must be present in order to reliably retrieve the hash of the shard's last ledger. The hash will only be retrieved when a later ledger is present. Otherwise verification of the shard will be deferred.
```C++
// We use the presence (or absense) of the validated
// ledger as a heuristic for determining whether or
// not we have stored a ledger that comes after the
// last ledger in this shard. A later ledger must
// be present in order to reliably retrieve the hash
// of the shard's last ledger.
if (app_.getLedgerMaster().getValidatedLedger())
{
auto const hash = app_.getLedgerMaster().getHashBySeq(
lastLedgerSeq(shardIndex));
.
.
.
}
```
The `getHashBySeq` function will be invoked only when a call to `LedgerMaster::getValidatedLedger` returns a validated ledger, rather than a `nullptr`. Otherwise validation of the shard will be deferred.
### Retries ### Retries
#### Retry Limit #### Retry Limit
If the server must defer shard validation, the software will initiate a timer that upon expiration, will re-attempt confirming the last ledger hash. We place an upper limit on the number of attempts the server makes to achieve this confirmation. When the maximum number of attempts has been reached, validation of the shard will fail, resulting in the removal of the shard. An attempt counts toward the limit only when we are able to get a validated ledger (implying a current view of the network), but are unable to retrieve the last ledger hash. Retries that occur because no validated ledger was available are not counted. If the server must defer hash verification, the software will initiate a timer that upon expiration, will re-attempt verifying the last ledger hash. We place an upper limit on the number of attempts the server makes to achieve this verification. When the maximum number of attempts has been reached, the download request will fail, and the `ShardArchiveHandler` will proceed with any remaining downloads. An attempt counts toward the limit only when we are able to get a later validated ledger (implying a current view of the network), but are unable to retrieve the last ledger hash. Retries that occur because no validated ledger was available are not counted.
#### ShardInfo
The `DatabaseShardImp` class stores a container of `ShardInfo` structs, each of which contains information pertaining to a shard held by the server. These structs will be used during shard import to store the last ledger hash (when available) and to track the number of hash confirmation attempts that have been made.
```C++
struct ShardInfo
{
.
.
.
// Used to limit the number of times we attempt
// to retrieve a shard's last ledger hash, when
// the hash should have been found. See
// scheduleFinalizeShard(). Once this limit has
// been exceeded, the shard has failed and is
// removed.
bool
attemptHashConfirmation()
{
if (lastLedgerHashAttempts + 1 <= maxLastLedgerHashAttempts)
{
++lastLedgerHashAttempts;
return true;
}
return false;
}
// This variable is used during the validation
// of imported shards and must match the
// imported shard's last ledger hash.
uint256 lastLedgerHash;
// The number of times we've attempted to
// confirm this shard's last ledger hash.
uint16_t lastLedgerHashAttempts;
// The upper limit on attempts to confirm
// the shard's last ledger hash.
static const uint8_t maxLastLedgerHashAttempts = 5;
};
```
### Shard Import
Once a shard has been successfully downloaded by the `ShardArchiveHandler`, this class invokes the `importShard` method on the shard database:
```C++
bool
DatabaseShardImp::importShard(
std::uint32_t shardIndex,
boost::filesystem::path const& srcDir)
```
At the end of this method, `DatabaseShardImp::finalizeShard` is invoked which begins validation of the downloaded shard. This will be changed so that instead, the software first creates a task to confirm the last ledger hash. Upon the successful completion of this task, shard validation will begin.
```C++
bool
DatabaseShardImp::importShard(
std::uint32_t shardIndex,
boost::filesystem::path const& srcDir)
{
.
.
.
taskQueue_->addTask([this]()
{
// Verify hash.
// Invoke DatabaseShardImp::finalizeShard on success.
// Defer task if necessary.
});
}
```

View File

@@ -156,7 +156,8 @@ DatabaseShardImp::init()
auto const result{shards_.emplace( auto const result{shards_.emplace(
shardIndex, shardIndex,
ShardInfo(std::move(shard), ShardInfo::State::none))}; ShardInfo(std::move(shard), ShardInfo::State::none))};
finalizeShard(result.first->second, true, lock); finalizeShard(
result.first->second, true, lock, boost::none);
} }
else else
{ {
@@ -355,6 +356,16 @@ DatabaseShardImp::importShard(
return false; return false;
} }
auto expectedHash =
app_.getLedgerMaster().walkHashBySeq(lastLedgerSeq(shardIndex));
if (!expectedHash)
{
JLOG(j_.error()) << "shard " << shardIndex
<< " expected hash not found";
return false;
}
auto renameDir = [&](path const& src, path const& dst) { auto renameDir = [&](path const& src, path const& dst) {
try try
{ {
@@ -376,8 +387,7 @@ DatabaseShardImp::importShard(
// Check shard is prepared // Check shard is prepared
if (auto const it{shards_.find(shardIndex)}; it == shards_.end() || if (auto const it{shards_.find(shardIndex)}; it == shards_.end() ||
it->second.shard || it->second.state != ShardInfo::State::import || it->second.shard || it->second.state != ShardInfo::State::import)
it->second.retryFinalize)
{ {
JLOG(j_.error()) << "shard " << shardIndex << " failed to import"; JLOG(j_.error()) << "shard " << shardIndex << " failed to import";
return false; return false;
@@ -404,8 +414,7 @@ DatabaseShardImp::importShard(
std::lock_guard lock(mutex_); std::lock_guard lock(mutex_);
auto const it{shards_.find(shardIndex)}; auto const it{shards_.find(shardIndex)};
if (it == shards_.end() || it->second.shard || if (it == shards_.end() || it->second.shard ||
it->second.state != ShardInfo::State::import || it->second.state != ShardInfo::State::import)
it->second.retryFinalize)
{ {
JLOG(j_.error()) << "shard " << shardIndex << " failed to import"; JLOG(j_.error()) << "shard " << shardIndex << " failed to import";
shard.reset(); shard.reset();
@@ -414,10 +423,9 @@ DatabaseShardImp::importShard(
} }
it->second.shard = std::move(shard); it->second.shard = std::move(shard);
it->second.retryFinalize = std::make_unique<RetryFinalize>(); finalizeShard(it->second, true, lock, expectedHash);
} }
finalizeWithRefHash(shardIndex);
return true; return true;
} }
@@ -611,9 +619,6 @@ DatabaseShardImp::onStop()
{ {
if (e.second.shard) if (e.second.shard)
e.second.shard->stop(); e.second.shard->stop();
if (e.second.retryFinalize)
e.second.retryFinalize.reset();
} }
shards_.clear(); shards_.clear();
} }
@@ -813,7 +818,8 @@ DatabaseShardImp::import(Database& source)
auto const result{shards_.emplace( auto const result{shards_.emplace(
shardIndex, shardIndex,
ShardInfo(std::move(shard), ShardInfo::State::none))}; ShardInfo(std::move(shard), ShardInfo::State::none))};
finalizeShard(result.first->second, true, lock); finalizeShard(
result.first->second, true, lock, boost::none);
} }
catch (std::exception const& e) catch (std::exception const& e)
{ {
@@ -1185,94 +1191,12 @@ DatabaseShardImp::findAcquireIndex(
return boost::none; return boost::none;
} }
void
DatabaseShardImp::finalizeWithRefHash(std::uint32_t shardIndex)
{
// We use the presence (or absence) of the validated
// ledger as a heuristic for determining whether or
// not we have stored a ledger that comes after the
// last ledger in this shard. A later ledger must
// be present in order to reliably retrieve the hash
// of the shard's last ledger.
boost::optional<uint256> referenceHash;
if (app_.getLedgerMaster().getValidatedLedger())
{
referenceHash =
app_.getLedgerMaster().walkHashBySeq(lastLedgerSeq(shardIndex));
}
// Make sure the shard was found in an
// expected state.
auto confirmShard = [this, shardIndex](
auto const it, std::lock_guard<std::mutex>&) {
if (it == shards_.end() ||
it->second.state != ShardInfo::State::import ||
!it->second.retryFinalize)
{
JLOG(j_.error()) << "shard " << shardIndex << " failed to import";
return false;
}
return true;
};
// The node is shutting down; remove the shard
// and return.
if (isStopping())
{
std::shared_ptr<Shard> shard;
{
std::lock_guard lock(mutex_);
auto const it{shards_.find(shardIndex)};
if(!confirmShard(it, lock))
return;
shard = it->second.shard;
}
JLOG(j_.warn())
<< "shard " << shardIndex
<< " will not be imported due to system shutdown, removing";
removeFailedShard(shard);
return;
}
std::lock_guard lock(mutex_);
auto const it{shards_.find(shardIndex)};
if(!confirmShard(it, lock))
return;
if (referenceHash && referenceHash->isNonZero())
{
it->second.retryFinalize->referenceHash = *referenceHash;
finalizeShard(it->second, true, lock);
return;
}
// Failed to find a reference hash, schedule to try again
if (!it->second.retryFinalize->retry(
app_,
std::bind(
&DatabaseShardImp::finalizeWithRefHash,
this,
std::placeholders::_1),
shardIndex))
{
JLOG(j_.error()) << "shard " << shardIndex
<< " failed to import, maximum attempts reached";
removeFailedShard(it->second.shard);
}
}
void void
DatabaseShardImp::finalizeShard( DatabaseShardImp::finalizeShard(
ShardInfo& shardInfo, ShardInfo& shardInfo,
bool writeSQLite, bool writeSQLite,
std::lock_guard<std::mutex>&) std::lock_guard<std::mutex>&,
boost::optional<uint256> const& expectedHash)
{ {
assert(shardInfo.shard); assert(shardInfo.shard);
assert(shardInfo.shard->index() != acquireIndex_); assert(shardInfo.shard->index() != acquireIndex_);
@@ -1282,19 +1206,16 @@ DatabaseShardImp::finalizeShard(
auto const shardIndex{shardInfo.shard->index()}; auto const shardIndex{shardInfo.shard->index()};
shardInfo.state = ShardInfo::State::finalize; shardInfo.state = ShardInfo::State::finalize;
taskQueue_->addTask([this, shardIndex, writeSQLite]() { taskQueue_->addTask([this, shardIndex, writeSQLite, expectedHash]() {
if (isStopping()) if (isStopping())
return; return;
std::shared_ptr<Shard> shard; std::shared_ptr<Shard> shard;
boost::optional<uint256> referenceHash;
{ {
std::lock_guard lock(mutex_); std::lock_guard lock(mutex_);
if (auto const it {shards_.find(shardIndex)}; it != shards_.end()) if (auto const it{shards_.find(shardIndex)}; it != shards_.end())
{ {
shard = it->second.shard; shard = it->second.shard;
if (it->second.retryFinalize)
*referenceHash = it->second.retryFinalize->referenceHash;
} }
else else
{ {
@@ -1303,7 +1224,7 @@ DatabaseShardImp::finalizeShard(
} }
} }
if (!shard->finalize(writeSQLite, referenceHash)) if (!shard->finalize(writeSQLite, expectedHash))
{ {
if (isStopping()) if (isStopping())
return; return;
@@ -1466,7 +1387,7 @@ DatabaseShardImp::storeLedgerInShard(
acquireIndex_ = 0; acquireIndex_ = 0;
if (it->second.state != ShardInfo::State::finalize) if (it->second.state != ShardInfo::State::finalize)
finalizeShard(it->second, false, lock); finalizeShard(it->second, false, lock, boost::none);
} }
else else
{ {
@@ -1480,17 +1401,15 @@ DatabaseShardImp::storeLedgerInShard(
} }
void void
DatabaseShardImp::removeFailedShard( DatabaseShardImp::removeFailedShard(std::shared_ptr<Shard> shard)
std::shared_ptr<Shard> shard)
{ {
{ {
std::lock_guard lock(mutex_); std::lock_guard lock(mutex_);
shards_.erase(shard->index());
if (shard->index() == acquireIndex_) if (shard->index() == acquireIndex_)
acquireIndex_ = 0; acquireIndex_ = 0;
if(shard->isFinal()) if ((shards_.erase(shard->index()) > 0) && shard->isFinal())
updateStatus(lock); updateStatus(lock);
} }

View File

@@ -23,7 +23,6 @@
#include <ripple/nodestore/DatabaseShard.h> #include <ripple/nodestore/DatabaseShard.h>
#include <ripple/nodestore/impl/Shard.h> #include <ripple/nodestore/impl/Shard.h>
#include <ripple/nodestore/impl/TaskQueue.h> #include <ripple/nodestore/impl/TaskQueue.h>
#include <ripple/nodestore/RetryFinalize.h>
#include <boost/asio/basic_waitable_timer.hpp> #include <boost/asio/basic_waitable_timer.hpp>
@@ -192,9 +191,6 @@ private:
std::shared_ptr<Shard> shard; std::shared_ptr<Shard> shard;
State state{State::none}; State state{State::none};
// Used during the validation of imported shards
std::unique_ptr<RetryFinalize> retryFinalize;
}; };
Application& app_; Application& app_;
@@ -267,12 +263,6 @@ private:
std::uint32_t validLedgerSeq, std::uint32_t validLedgerSeq,
std::lock_guard<std::mutex>&); std::lock_guard<std::mutex>&);
public:
// Attempts to retrieve a reference last ledger hash
// for a shard and finalize it
void
finalizeWithRefHash(std::uint32_t shardIndex);
private: private:
// Queue a task to finalize a shard by validating its databases // Queue a task to finalize a shard by validating its databases
// Lock must be held // Lock must be held
@@ -280,7 +270,8 @@ private:
finalizeShard( finalizeShard(
ShardInfo& shardInfo, ShardInfo& shardInfo,
bool writeSQLite, bool writeSQLite,
std::lock_guard<std::mutex>&); std::lock_guard<std::mutex>&,
boost::optional<uint256> const& expectedHash);
// Set storage and file descriptor usage stats // Set storage and file descriptor usage stats
// Lock must NOT be held // Lock must NOT be held

View File

@@ -396,7 +396,7 @@ Shard::isLegacy() const
bool bool
Shard::finalize( Shard::finalize(
bool const writeSQLite, bool const writeSQLite,
boost::optional<uint256> const& referenceHash) boost::optional<uint256> const& expectedHash)
{ {
assert(backend_); assert(backend_);
@@ -502,7 +502,7 @@ Shard::finalize(
// Validate the last ledger hash of a downloaded shard // Validate the last ledger hash of a downloaded shard
// using a ledger hash obtained from the peer network // using a ledger hash obtained from the peer network
if (referenceHash && *referenceHash != hash) if (expectedHash && *expectedHash != hash)
return fail("invalid last ledger hash"); return fail("invalid last ledger hash");
// Validate every ledger stored in the backend // Validate every ledger stored in the backend

View File

@@ -24,6 +24,7 @@
#include <ripple/basics/BasicConfig.h> #include <ripple/basics/BasicConfig.h>
#include <ripple/basics/StringUtilities.h> #include <ripple/basics/StringUtilities.h>
#include <ripple/net/DatabaseDownloader.h> #include <ripple/net/DatabaseDownloader.h>
#include <ripple/rpc/ShardVerificationScheduler.h>
#include <boost/asio/basic_waitable_timer.hpp> #include <boost/asio/basic_waitable_timer.hpp>
#include <boost/filesystem.hpp> #include <boost/filesystem.hpp>
@@ -34,38 +35,33 @@ class ShardArchiveHandler_test;
} }
namespace RPC { namespace RPC {
/** Handles the download and import one or more shard archives. */ /** Handles the download and import of one or more shard archives. */
class ShardArchiveHandler class ShardArchiveHandler : public Stoppable
: public Stoppable,
public std::enable_shared_from_this<ShardArchiveHandler>
{ {
public: public:
using pointer = std::shared_ptr<ShardArchiveHandler>; using TimerOpCounter =
ClosureCounter<void, boost::system::error_code const&>;
friend class test::ShardArchiveHandler_test; friend class test::ShardArchiveHandler_test;
static boost::filesystem::path static boost::filesystem::path
getDownloadDirectory(Config const& config); getDownloadDirectory(Config const& config);
static pointer static std::unique_ptr<ShardArchiveHandler>
getInstance(); makeShardArchiveHandler(Application& app, Stoppable& parent);
static pointer // Create a ShardArchiveHandler only if
getInstance(Application& app, Stoppable& parent); // the state database is present, indicating
// that recovery is needed.
static std::unique_ptr<ShardArchiveHandler>
tryMakeRecoveryHandler(Application& app, Stoppable& parent);
static pointer ShardArchiveHandler(Application& app, Stoppable& parent);
recoverInstance(Application& app, Stoppable& parent);
static bool virtual ~ShardArchiveHandler() = default;
hasInstance();
bool [[nodiscard]] bool
init(); init();
bool
initFromDB();
~ShardArchiveHandler() = default;
bool bool
add(std::uint32_t shardIndex, std::pair<parsedURL, std::string>&& url); add(std::uint32_t shardIndex, std::pair<parsedURL, std::string>&& url);
@@ -84,10 +80,8 @@ private:
ShardArchiveHandler& ShardArchiveHandler&
operator=(ShardArchiveHandler const&) = delete; operator=(ShardArchiveHandler const&) = delete;
ShardArchiveHandler( [[nodiscard]] bool
Application& app, initFromDB(std::lock_guard<std::mutex> const&);
Stoppable& parent,
bool recovery = false);
void void
onStop() override; onStop() override;
@@ -105,7 +99,7 @@ private:
// Begins the download and import of the next archive. // Begins the download and import of the next archive.
bool bool
next(std::lock_guard<std::mutex>& l); next(std::lock_guard<std::mutex> const& l);
// Callback used by the downloader to notify completion of a download. // Callback used by the downloader to notify completion of a download.
void void
@@ -117,23 +111,57 @@ private:
// Remove the archive being processed. // Remove the archive being processed.
void void
remove(std::lock_guard<std::mutex>&); remove(std::lock_guard<std::mutex> const&);
void void
doRelease(std::lock_guard<std::mutex> const&); doRelease(std::lock_guard<std::mutex> const&);
static std::mutex instance_mutex_; bool
static pointer instance_; onClosureFailed(
std::string const& errorMsg,
std::lock_guard<std::mutex> const& lock);
bool
removeAndProceed(std::lock_guard<std::mutex> const& lock);
/////////////////////////////////////////////////
// m_ is used to protect access to downloader_,
// archives_, process_ and to protect setting and
// destroying sqliteDB_.
/////////////////////////////////////////////////
std::mutex mutable m_; std::mutex mutable m_;
std::unique_ptr<DatabaseDownloader> downloader_;
std::map<std::uint32_t, parsedURL> archives_;
bool process_;
std::unique_ptr<DatabaseCon> sqliteDB_;
/////////////////////////////////////////////////
Application& app_; Application& app_;
beast::Journal const j_; beast::Journal const j_;
std::unique_ptr<DatabaseCon> sqliteDB_;
std::shared_ptr<DatabaseDownloader> downloader_;
boost::filesystem::path const downloadDir_; boost::filesystem::path const downloadDir_;
boost::asio::basic_waitable_timer<std::chrono::steady_clock> timer_; boost::asio::basic_waitable_timer<std::chrono::steady_clock> timer_;
bool process_; JobCounter jobCounter_;
std::map<std::uint32_t, parsedURL> archives_; TimerOpCounter timerCounter_;
ShardVerificationScheduler verificationScheduler_;
};
////////////////////////////////////////////////////////////////////
// The RecoveryHandler is an empty class that is constructed by
// the application when the ShardArchiveHandler's state database
// is present at application start, indicating that the handler
// needs to perform recovery. However, if recovery isn't needed
// at application start, and the user subsequently submits a request
// to download shards, we construct a ShardArchiveHandler rather
// than a RecoveryHandler to process the request. With this approach,
// type verification can be employed to determine whether the
// ShardArchiveHandler was constructed in recovery mode by the
// application, or as a response to a user submitting a request to
// download shards.
////////////////////////////////////////////////////////////////////
class RecoveryHandler : public ShardArchiveHandler
{
public:
RecoveryHandler(Application& app, Stoppable& parent);
}; };
} // namespace RPC } // namespace RPC

View File

@@ -0,0 +1,84 @@
//------------------------------------------------------------------------------
/*
This file is part of rippled: https://github.com/ripple/rippled
Copyright (c) 2020 Ripple Labs Inc.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef RIPPLE_RPC_SHARDVERIFICATIONSCHEDULER_H_INCLUDED
#define RIPPLE_RPC_SHARDVERIFICATIONSCHEDULER_H_INCLUDED
#include <ripple/app/main/Application.h>
#include <functional>
namespace ripple {
namespace RPC {
class ShardVerificationScheduler
{
public:
// This is the signature of the function that the client
// wants to have invoked upon timer expiration. The function
// should check the error code 'ec' and abort the function
// if the timer was cancelled:
// (ec == boost::asio::error::operation_aborted).
// In the body of the function, the client should perform
// the necessary verification.
using retryFunction =
std::function<void(boost::system::error_code const& ec)>;
ShardVerificationScheduler() = default;
ShardVerificationScheduler(
std::chrono::seconds retryInterval,
std::uint32_t maxAttempts);
bool
retry(Application& app, bool shouldHaveHash, retryFunction f);
void
reset();
private:
using waitable_timer =
boost::asio::basic_waitable_timer<std::chrono::steady_clock>;
/////////////////////////////////////////////////////
// NOTE: retryInterval_ and maxAttempts_ were chosen
// semi-arbitrarily and experimenting with other
// values might prove useful.
/////////////////////////////////////////////////////
static constexpr std::chrono::seconds defaultRetryInterval_{60};
static constexpr std::uint32_t defaultmaxAttempts_{5};
// The number of seconds to wait before retrying
// retrieval of a shard's last ledger hash
const std::chrono::seconds retryInterval_{defaultRetryInterval_};
// Maximum attempts to retrieve a shard's last ledger hash
const std::uint32_t maxAttempts_{defaultmaxAttempts_};
std::unique_ptr<waitable_timer> timer_;
// Number of attempts to retrieve a shard's last ledger hash
std::uint32_t numAttempts_{0};
};
} // namespace RPC
} // namespace ripple
#endif // RIPPLE_RPC_SHARDVERIFICATIONSCHEDULER_H_INCLUDED

View File

@@ -60,7 +60,7 @@ doDownloadShard(RPC::JsonContext& context)
auto preShards{shardStore->getPreShards()}; auto preShards{shardStore->getPreShards()};
if (!preShards.empty()) if (!preShards.empty())
{ {
std::string s{"Download in progress. Shard"}; std::string s{"Download already in progress. Shard"};
if (!std::all_of(preShards.begin(), preShards.end(), ::isdigit)) if (!std::all_of(preShards.begin(), preShards.end(), ::isdigit))
s += "s"; s += "s";
return RPC::makeObjectValue(s + " " + preShards); return RPC::makeObjectValue(s + " " + preShards);
@@ -129,22 +129,15 @@ doDownloadShard(RPC::JsonContext& context)
} }
} }
RPC::ShardArchiveHandler::pointer handler; RPC::ShardArchiveHandler* handler = nullptr;
try try
{ {
handler = RPC::ShardArchiveHandler::hasInstance() handler = context.app.getShardArchiveHandler();
? RPC::ShardArchiveHandler::getInstance()
: RPC::ShardArchiveHandler::getInstance(
context.app, context.app.getJobQueue());
if (!handler) if (!handler)
return RPC::make_error( return RPC::make_error(
rpcINTERNAL, "Failed to create ShardArchiveHandler."); rpcINTERNAL, "Failed to create ShardArchiveHandler.");
if (!handler->init())
return RPC::make_error(
rpcINTERNAL, "Failed to initiate ShardArchiveHandler.");
} }
catch (std::exception const& e) catch (std::exception const& e)
{ {

View File

@@ -22,7 +22,6 @@
#include <ripple/basics/BasicConfig.h> #include <ripple/basics/BasicConfig.h>
#include <ripple/core/ConfigSections.h> #include <ripple/core/ConfigSections.h>
#include <ripple/nodestore/DatabaseShard.h> #include <ripple/nodestore/DatabaseShard.h>
#include <ripple/protocol/ErrorCodes.h>
#include <ripple/rpc/ShardArchiveHandler.h> #include <ripple/rpc/ShardArchiveHandler.h>
#include <ripple/rpc/impl/Handler.h> #include <ripple/rpc/impl/Handler.h>
@@ -34,9 +33,6 @@ namespace RPC {
using namespace boost::filesystem; using namespace boost::filesystem;
using namespace std::chrono_literals; using namespace std::chrono_literals;
std::mutex ShardArchiveHandler::instance_mutex_;
ShardArchiveHandler::pointer ShardArchiveHandler::instance_ = nullptr;
boost::filesystem::path boost::filesystem::path
ShardArchiveHandler::getDownloadDirectory(Config const& config) ShardArchiveHandler::getDownloadDirectory(Config const& config)
{ {
@@ -48,66 +44,73 @@ ShardArchiveHandler::getDownloadDirectory(Config const& config)
"download"; "download";
} }
auto std::unique_ptr<ShardArchiveHandler>
ShardArchiveHandler::getInstance() -> pointer ShardArchiveHandler::makeShardArchiveHandler(
{
std::lock_guard lock(instance_mutex_);
return instance_;
}
auto
ShardArchiveHandler::getInstance(Application& app, Stoppable& parent) -> pointer
{
std::lock_guard lock(instance_mutex_);
assert(!instance_);
instance_.reset(new ShardArchiveHandler(app, parent));
return instance_;
}
auto
ShardArchiveHandler::recoverInstance(Application& app, Stoppable& parent)
-> pointer
{
std::lock_guard lock(instance_mutex_);
assert(!instance_);
instance_.reset(new ShardArchiveHandler(app, parent, true));
return instance_;
}
bool
ShardArchiveHandler::hasInstance()
{
std::lock_guard lock(instance_mutex_);
return instance_.get() != nullptr;
}
ShardArchiveHandler::ShardArchiveHandler(
Application& app, Application& app,
Stoppable& parent, Stoppable& parent)
bool recovery) {
return std::make_unique<ShardArchiveHandler>(app, parent);
}
std::unique_ptr<ShardArchiveHandler>
ShardArchiveHandler::tryMakeRecoveryHandler(Application& app, Stoppable& parent)
{
auto const downloadDir(getDownloadDirectory(app.config()));
// Create the handler iff the database
// is present.
if (exists(downloadDir / stateDBName) &&
is_regular_file(downloadDir / stateDBName))
{
return std::make_unique<RecoveryHandler>(app, parent);
}
return nullptr;
}
ShardArchiveHandler::ShardArchiveHandler(Application& app, Stoppable& parent)
: Stoppable("ShardArchiveHandler", parent) : Stoppable("ShardArchiveHandler", parent)
, process_(false)
, app_(app) , app_(app)
, j_(app.journal("ShardArchiveHandler")) , j_(app.journal("ShardArchiveHandler"))
, downloadDir_(getDownloadDirectory(app.config())) , downloadDir_(getDownloadDirectory(app.config()))
, timer_(app_.getIOService()) , timer_(app_.getIOService())
, process_(false) , verificationScheduler_(
std::chrono::seconds(get<std::uint32_t>(
app.config().section(ConfigSection::shardDatabase()),
"shard_verification_retry_interval")),
get<std::uint32_t>(
app.config().section(ConfigSection::shardDatabase()),
"shard_verification_max_attempts"))
{ {
assert(app_.getShardStore()); assert(app_.getShardStore());
if (recovery)
downloader_.reset(
new DatabaseDownloader(app_.getIOService(), j_, app_.config()));
} }
bool bool
ShardArchiveHandler::init() ShardArchiveHandler::init()
{ {
std::lock_guard lock(m_);
if (process_ || downloader_ != nullptr || sqliteDB_ != nullptr)
{
JLOG(j_.warn()) << "Archives already being processed";
return false;
}
// Initialize from pre-existing database
if (exists(downloadDir_ / stateDBName) &&
is_regular_file(downloadDir_ / stateDBName))
{
downloader_.reset(
new DatabaseDownloader(app_.getIOService(), j_, app_.config()));
return initFromDB(lock);
}
// Fresh initialization
else
{
try try
{ {
create_directories(downloadDir_); create_directories(downloadDir_);
@@ -120,17 +123,18 @@ ShardArchiveHandler::init()
} }
catch (std::exception const& e) catch (std::exception const& e)
{ {
JLOG(j_.error()) << "exception: " << e.what() JLOG(j_.error())
<< " in function: " << __func__; << "exception: " << e.what() << " in function: " << __func__;
return false; return false;
} }
}
return true; return true;
} }
bool bool
ShardArchiveHandler::initFromDB() ShardArchiveHandler::initFromDB(std::lock_guard<std::mutex> const& lock)
{ {
try try
{ {
@@ -151,8 +155,6 @@ ShardArchiveHandler::initFromDB()
soci::rowset<soci::row> rs = soci::rowset<soci::row> rs =
(session.prepare << "SELECT * FROM State;"); (session.prepare << "SELECT * FROM State;");
std::lock_guard<std::mutex> lock(m_);
for (auto it = rs.begin(); it != rs.end(); ++it) for (auto it = rs.begin(); it != rs.end(); ++it)
{ {
parsedURL url; parsedURL url;
@@ -190,6 +192,7 @@ ShardArchiveHandler::initFromDB()
void void
ShardArchiveHandler::onStop() ShardArchiveHandler::onStop()
{ {
{
std::lock_guard<std::mutex> lock(m_); std::lock_guard<std::mutex> lock(m_);
if (downloader_) if (downloader_)
@@ -198,6 +201,15 @@ ShardArchiveHandler::onStop()
downloader_.reset(); downloader_.reset();
} }
timer_.cancel();
}
jobCounter_.join(
"ShardArchiveHandler", std::chrono::milliseconds(2000), j_);
timerCounter_.join(
"ShardArchiveHandler", std::chrono::milliseconds(2000), j_);
stopped(); stopped();
} }
@@ -271,7 +283,7 @@ ShardArchiveHandler::start()
if (!downloader_) if (!downloader_)
{ {
// will throw if can't initialize ssl context // will throw if can't initialize ssl context
downloader_ = std::make_shared<DatabaseDownloader>( downloader_ = std::make_unique<DatabaseDownloader>(
app_.getIOService(), j_, app_.config()); app_.getIOService(), j_, app_.config());
} }
} }
@@ -281,6 +293,7 @@ ShardArchiveHandler::start()
return false; return false;
} }
process_ = true;
return next(lock); return next(lock);
} }
@@ -292,7 +305,7 @@ ShardArchiveHandler::release()
} }
bool bool
ShardArchiveHandler::next(std::lock_guard<std::mutex>& l) ShardArchiveHandler::next(std::lock_guard<std::mutex> const& l)
{ {
if (archives_.empty()) if (archives_.empty())
{ {
@@ -300,8 +313,51 @@ ShardArchiveHandler::next(std::lock_guard<std::mutex>& l)
return false; return false;
} }
// Create a temp archive directory at the root if (isStopping())
return false;
auto const shardIndex{archives_.begin()->first}; auto const shardIndex{archives_.begin()->first};
// We use the sequence of the last validated ledger
// to determine whether or not we have stored a ledger
// that comes after the last ledger in this shard. A
// later ledger must be present in order to reliably
// retrieve the hash of the shard's last ledger.
boost::optional<uint256> expectedHash;
bool shouldHaveHash = false;
if (auto const seq = app_.getShardStore()->lastLedgerSeq(shardIndex);
(shouldHaveHash = app_.getLedgerMaster().getValidLedgerIndex() > seq))
{
expectedHash = app_.getLedgerMaster().walkHashBySeq(seq);
}
if (!expectedHash)
{
auto wrapper =
timerCounter_.wrap([this](boost::system::error_code const& ec) {
if (ec != boost::asio::error::operation_aborted)
{
std::lock_guard lock(m_);
this->next(lock);
}
});
if (!wrapper)
return onClosureFailed(
"failed to wrap closure for last ledger confirmation timer", l);
if (!verificationScheduler_.retry(app_, shouldHaveHash, *wrapper))
{
JLOG(j_.error()) << "failed to find last ledger hash for shard "
<< shardIndex << ", maximum attempts reached";
return removeAndProceed(l);
}
return true;
}
// Create a temp archive directory at the root
auto const dstDir{downloadDir_ / std::to_string(shardIndex)}; auto const dstDir{downloadDir_ / std::to_string(shardIndex)};
try try
{ {
@@ -310,42 +366,42 @@ ShardArchiveHandler::next(std::lock_guard<std::mutex>& l)
catch (std::exception const& e) catch (std::exception const& e)
{ {
JLOG(j_.error()) << "exception: " << e.what(); JLOG(j_.error()) << "exception: " << e.what();
remove(l); return removeAndProceed(l);
return next(l);
} }
// Download the archive. Process in another thread // Download the archive. Process in another thread
// to prevent holding up the lock if the downloader // to prevent holding up the lock if the downloader
// sleeps. // sleeps.
auto const& url{archives_.begin()->second}; auto const& url{archives_.begin()->second};
app_.getJobQueue().addJob( auto wrapper = jobCounter_.wrap([this, url, dstDir](Job&) {
jtCLIENT,
"ShardArchiveHandler",
[this, ptr = shared_from_this(), url, dstDir](Job&) {
if (!downloader_->download( if (!downloader_->download(
url.domain, url.domain,
std::to_string(url.port.get_value_or(443)), std::to_string(url.port.get_value_or(443)),
url.path, url.path,
11, 11,
dstDir / "archive.tar.lz4", dstDir / "archive.tar.lz4",
std::bind( [this](path dstPath) { complete(dstPath); }))
&ShardArchiveHandler::complete,
ptr,
std::placeholders::_1)))
{ {
std::lock_guard<std::mutex> l(m_); std::lock_guard<std::mutex> l(m_);
remove(l); removeAndProceed(l);
next(l);
} }
}); });
process_ = true; if (!wrapper)
return onClosureFailed(
"failed to wrap closure for starting download", l);
app_.getJobQueue().addJob(jtCLIENT, "ShardArchiveHandler", *wrapper);
return true; return true;
} }
void void
ShardArchiveHandler::complete(path dstPath) ShardArchiveHandler::complete(path dstPath)
{ {
if (isStopping())
return;
{ {
std::lock_guard lock(m_); std::lock_guard lock(m_);
try try
@@ -354,29 +410,26 @@ ShardArchiveHandler::complete(path dstPath)
{ {
auto ar{archives_.begin()}; auto ar{archives_.begin()};
JLOG(j_.error()) JLOG(j_.error())
<< "Downloading shard id " << ar->first << " form URL " << "Downloading shard id " << ar->first << " from URL "
<< ar->second.domain << ar->second.path; << ar->second.domain << ar->second.path;
remove(lock); removeAndProceed(lock);
next(lock);
return; return;
} }
} }
catch (std::exception const& e) catch (std::exception const& e)
{ {
JLOG(j_.error()) << "exception: " << e.what(); JLOG(j_.error()) << "exception: " << e.what();
remove(lock); removeAndProceed(lock);
next(lock);
return; return;
} }
} }
// Process in another thread to not hold up the IO service auto wrapper = jobCounter_.wrap([=, dstPath = std::move(dstPath)](Job&) {
app_.getJobQueue().addJob( if (isStopping())
jtCLIENT, return;
"ShardArchiveHandler",
[=, dstPath = std::move(dstPath), ptr = shared_from_this()](Job&) {
// If not synced then defer and retry // If not synced then defer and retry
auto const mode{ptr->app_.getOPs().getOperatingMode()}; auto const mode{app_.getOPs().getOperatingMode()};
if (mode != OperatingMode::FULL) if (mode != OperatingMode::FULL)
{ {
std::lock_guard lock(m_); std::lock_guard lock(m_);
@@ -384,21 +437,41 @@ ShardArchiveHandler::complete(path dstPath)
(static_cast<std::size_t>(OperatingMode::FULL) - (static_cast<std::size_t>(OperatingMode::FULL) -
static_cast<std::size_t>(mode)) * static_cast<std::size_t>(mode)) *
10)); 10));
timer_.async_wait(
[=, dstPath = std::move(dstPath), ptr = std::move(ptr)]( auto wrapper =
timerCounter_.wrap([=, dstPath = std::move(dstPath)](
boost::system::error_code const& ec) { boost::system::error_code const& ec) {
if (ec != boost::asio::error::operation_aborted) if (ec != boost::asio::error::operation_aborted)
ptr->complete(std::move(dstPath)); complete(std::move(dstPath));
}); });
if (!wrapper)
onClosureFailed(
"failed to wrap closure for operating mode timer", lock);
else
timer_.async_wait(*wrapper);
} }
else else
{ {
ptr->process(dstPath); process(dstPath);
std::lock_guard lock(m_); std::lock_guard lock(m_);
remove(lock); removeAndProceed(lock);
next(lock);
} }
}); });
if (!wrapper)
{
if (isStopping())
return;
JLOG(j_.error()) << "failed to wrap closure for process()";
std::lock_guard lock(m_);
removeAndProceed(lock);
}
// Process in another thread to not hold up the IO service
app_.getJobQueue().addJob(jtCLIENT, "ShardArchiveHandler", *wrapper);
} }
void void
@@ -441,8 +514,10 @@ ShardArchiveHandler::process(path const& dstPath)
} }
void void
ShardArchiveHandler::remove(std::lock_guard<std::mutex>&) ShardArchiveHandler::remove(std::lock_guard<std::mutex> const&)
{ {
verificationScheduler_.reset();
auto const shardIndex{archives_.begin()->first}; auto const shardIndex{archives_.begin()->first};
app_.getShardStore()->removePreShard(shardIndex); app_.getShardStore()->removePreShard(shardIndex);
archives_.erase(shardIndex); archives_.erase(shardIndex);
@@ -466,8 +541,6 @@ ShardArchiveHandler::remove(std::lock_guard<std::mutex>&)
void void
ShardArchiveHandler::doRelease(std::lock_guard<std::mutex> const&) ShardArchiveHandler::doRelease(std::lock_guard<std::mutex> const&)
{ {
process_ = false;
timer_.cancel(); timer_.cancel();
for (auto const& ar : archives_) for (auto const& ar : archives_)
app_.getShardStore()->removePreShard(ar.first); app_.getShardStore()->removePreShard(ar.first);
@@ -493,6 +566,32 @@ ShardArchiveHandler::doRelease(std::lock_guard<std::mutex> const&)
} }
downloader_.reset(); downloader_.reset();
process_ = false;
}
bool
ShardArchiveHandler::onClosureFailed(
std::string const& errorMsg,
std::lock_guard<std::mutex> const& lock)
{
if (isStopping())
return false;
JLOG(j_.error()) << errorMsg;
return removeAndProceed(lock);
}
bool
ShardArchiveHandler::removeAndProceed(std::lock_guard<std::mutex> const& lock)
{
remove(lock);
return next(lock);
}
RecoveryHandler::RecoveryHandler(Application& app, Stoppable& parent)
: ShardArchiveHandler(app, parent)
{
} }
} // namespace RPC } // namespace RPC

View File

@@ -18,36 +18,51 @@
//============================================================================== //==============================================================================
#include <ripple/app/ledger/LedgerMaster.h> #include <ripple/app/ledger/LedgerMaster.h>
#include <ripple/nodestore/RetryFinalize.h> #include <ripple/rpc/ShardVerificationScheduler.h>
namespace ripple { namespace ripple {
namespace NodeStore { namespace RPC {
ShardVerificationScheduler::ShardVerificationScheduler(
std::chrono::seconds retryInterval,
std::uint32_t maxAttempts)
: retryInterval_(
(retryInterval == std::chrono::seconds(0) ? defaultRetryInterval_
: retryInterval))
, maxAttempts_(maxAttempts == 0 ? defaultmaxAttempts_ : maxAttempts)
{
}
bool bool
RetryFinalize::retry( ShardVerificationScheduler::retry(
Application& app, Application& app,
retryFunction f, bool shouldHaveHash,
std::uint32_t shardIndex) retryFunction f)
{ {
if (numAttempts_ >= maxAttempts_) if (numAttempts_ >= maxAttempts_)
return false; return false;
// Retry attempts only count when we have a validated ledger // Retry attempts only count when we
if (app.getLedgerMaster().getValidatedLedger()) // have a validated ledger with a
// sequence later than the shard's
// last ledger.
if (shouldHaveHash)
++numAttempts_; ++numAttempts_;
if (!timer_) if (!timer_)
timer_ = std::make_unique<waitable_timer>(app.getIOService()); timer_ = std::make_unique<waitable_timer>(app.getIOService());
timer_->expires_from_now(retryInterval_); timer_->expires_from_now(retryInterval_);
timer_->async_wait([f{std::move(f)}, shardIndex_ = shardIndex]( timer_->async_wait(f);
boost::system::error_code const& ec) {
if (ec != boost::asio::error::operation_aborted)
f(shardIndex_);
});
return true; return true;
} }
} // namespace NodeStore void
ShardVerificationScheduler::reset()
{
numAttempts_ = 0;
}
} // namespace RPC
} // namespace ripple } // namespace ripple

View File

@@ -29,7 +29,7 @@
namespace ripple { namespace ripple {
namespace test { namespace test {
class SSLHTTPDownloader_test : public beast::unit_test::suite class DatabaseDownloader_test : public beast::unit_test::suite
{ {
TrustedPublisherServer TrustedPublisherServer
createServer(jtx::Env& env, bool ssl = true) createServer(jtx::Env& env, bool ssl = true)
@@ -80,7 +80,7 @@ class SSLHTTPDownloader_test : public beast::unit_test::suite
{ {
test::StreamSink sink_; test::StreamSink sink_;
beast::Journal journal_; beast::Journal journal_;
// The SSLHTTPDownloader must be created as shared_ptr // The DatabaseDownloader must be created as shared_ptr
// because it uses shared_from_this // because it uses shared_from_this
std::shared_ptr<DatabaseDownloader> ptr_; std::shared_ptr<DatabaseDownloader> ptr_;
@@ -265,6 +265,6 @@ public:
} }
}; };
BEAST_DEFINE_TESTSUITE(SSLHTTPDownloader, net, ripple); BEAST_DEFINE_TESTSUITE(DatabaseDownloader, net, ripple);
} // namespace test } // namespace test
} // namespace ripple } // namespace ripple

View File

@@ -34,7 +34,7 @@ namespace test {
class ShardArchiveHandler_test : public beast::unit_test::suite class ShardArchiveHandler_test : public beast::unit_test::suite
{ {
using Downloads = std::vector<std::pair<uint32_t, std::string>>; using Downloads = std::vector<std::pair<std::uint32_t, std::string>>;
TrustedPublisherServer TrustedPublisherServer
createServer(jtx::Env& env, bool ssl = true) createServer(jtx::Env& env, bool ssl = true)
@@ -49,12 +49,14 @@ class ShardArchiveHandler_test : public beast::unit_test::suite
} }
public: public:
// Test the shard downloading module by initiating
// and completing a download and verifying the
// contents of the state database.
void void
testStateDatabase1() testSingleDownloadAndStateDB()
{ {
testcase("testStateDatabase1"); testcase("testSingleDownloadAndStateDB");
{
beast::temp_dir tempDir; beast::temp_dir tempDir;
auto c = jtx::envconfig(); auto c = jtx::envconfig();
@@ -64,11 +66,9 @@ public:
c->setupControl(true, true, true); c->setupControl(true, true, true);
jtx::Env env(*this, std::move(c)); jtx::Env env(*this, std::move(c));
auto handler = RPC::ShardArchiveHandler::getInstance( auto handler = env.app().getShardArchiveHandler();
env.app(), env.app().getJobQueue());
BEAST_EXPECT(handler); BEAST_EXPECT(handler);
BEAST_EXPECT(dynamic_cast<RPC::RecoveryHandler*>(handler) == nullptr);
BEAST_EXPECT(handler->init());
std::string const rawUrl = "https://foo:443/1.tar.lz4"; std::string const rawUrl = "https://foo:443/1.tar.lz4";
parsedURL url; parsedURL url;
@@ -84,7 +84,7 @@ public:
soci::rowset<soci::row> rs = soci::rowset<soci::row> rs =
(session.prepare << "SELECT * FROM State;"); (session.prepare << "SELECT * FROM State;");
uint64_t rowCount = 0; std::uint64_t rowCount = 0;
for (auto it = rs.begin(); it != rs.end(); ++it, ++rowCount) for (auto it = rs.begin(); it != rs.end(); ++it, ++rowCount)
{ {
@@ -98,17 +98,14 @@ public:
handler->release(); handler->release();
} }
// Destroy the singleton so we start fresh in // Test the shard downloading module by initiating
// the next testcase. // and completing three downloads and verifying
RPC::ShardArchiveHandler::instance_.reset(); // the contents of the state database.
}
void void
testStateDatabase2() testDownloadsAndStateDB()
{ {
testcase("testStateDatabase2"); testcase("testDownloadsAndStateDB");
{
beast::temp_dir tempDir; beast::temp_dir tempDir;
auto c = jtx::envconfig(); auto c = jtx::envconfig();
@@ -118,11 +115,9 @@ public:
c->setupControl(true, true, true); c->setupControl(true, true, true);
jtx::Env env(*this, std::move(c)); jtx::Env env(*this, std::move(c));
auto handler = RPC::ShardArchiveHandler::getInstance( auto handler = env.app().getShardArchiveHandler();
env.app(), env.app().getJobQueue());
BEAST_EXPECT(handler); BEAST_EXPECT(handler);
BEAST_EXPECT(dynamic_cast<RPC::RecoveryHandler*>(handler) == nullptr);
BEAST_EXPECT(handler->init());
Downloads const dl = { Downloads const dl = {
{1, "https://foo:443/1.tar.lz4"}, {1, "https://foo:443/1.tar.lz4"},
@@ -143,7 +138,7 @@ public:
soci::rowset<soci::row> rs = soci::rowset<soci::row> rs =
(session.prepare << "SELECT * FROM State;"); (session.prepare << "SELECT * FROM State;");
uint64_t pos = 0; std::uint64_t pos = 0;
for (auto it = rs.begin(); it != rs.end(); ++it, ++pos) for (auto it = rs.begin(); it != rs.end(); ++it, ++pos)
{ {
BEAST_EXPECT(it->get<int>(0) == dl[pos].first); BEAST_EXPECT(it->get<int>(0) == dl[pos].first);
@@ -156,46 +151,56 @@ public:
handler->release(); handler->release();
} }
// Destroy the singleton so we start fresh in // Test the shard downloading module by initiating
// the next testcase. // and completing ten downloads and verifying the
RPC::ShardArchiveHandler::instance_.reset(); // contents of the filesystem and the handler's
} // archives.
void void
testStateDatabase3() testDownloadsAndFileSystem()
{ {
testcase("testStateDatabase3"); testcase("testDownloadsAndFileSystem");
{
beast::temp_dir tempDir; beast::temp_dir tempDir;
auto c = jtx::envconfig(); auto c = jtx::envconfig();
auto& section = c->section(ConfigSection::shardDatabase()); auto& section = c->section(ConfigSection::shardDatabase());
section.set("path", tempDir.path()); section.set("path", tempDir.path());
section.set("max_size_gb", "100"); section.set("max_size_gb", "100");
section.set("ledgers_per_shard", "256");
c->setupControl(true, true, true); c->setupControl(true, true, true);
jtx::Env env(*this, std::move(c)); jtx::Env env(*this, std::move(c));
auto handler = RPC::ShardArchiveHandler::getInstance(
env.app(), env.app().getJobQueue());
BEAST_EXPECT(handler);
BEAST_EXPECT(handler->init()); std::uint8_t const numberOfDownloads = 10;
// Create some ledgers so that the ShardArchiveHandler
// can verify the last ledger hash for the shard
// downloads.
for (int i = 0; i < env.app().getShardStore()->ledgersPerShard() *
(numberOfDownloads + 1);
++i)
{
env.close();
}
auto handler = env.app().getShardArchiveHandler();
BEAST_EXPECT(handler);
BEAST_EXPECT(dynamic_cast<RPC::RecoveryHandler*>(handler) == nullptr);
auto server = createServer(env); auto server = createServer(env);
auto host = server.local_endpoint().address().to_string(); auto host = server.local_endpoint().address().to_string();
auto port = std::to_string(server.local_endpoint().port()); auto port = std::to_string(server.local_endpoint().port());
server.stop(); server.stop();
Downloads const dl = [&host, &port] { Downloads const dl = [count = numberOfDownloads, &host, &port] {
Downloads ret; Downloads ret;
for (int i = 1; i <= 10; ++i) for (int i = 1; i <= count; ++i)
{ {
ret.push_back( ret.push_back(
{i, {i,
(boost::format("https://%s:%d/%d.tar.lz4") % host % (boost::format("https://%s:%d/%d.tar.lz4") % host % port %
port % i) i)
.str()}); .str()});
} }
@@ -211,34 +216,43 @@ public:
BEAST_EXPECT(handler->start()); BEAST_EXPECT(handler->start());
auto stateDir = RPC::ShardArchiveHandler::getDownloadDirectory( auto stateDir =
env.app().config()); RPC::ShardArchiveHandler::getDownloadDirectory(env.app().config());
std::unique_lock<std::mutex> lock(handler->m_); std::unique_lock<std::mutex> lock(handler->m_);
BEAST_EXPECT( BEAST_EXPECT(
boost::filesystem::exists(stateDir) || boost::filesystem::exists(stateDir) || handler->archives_.empty());
handler->archives_.empty());
using namespace std::chrono_literals;
auto waitMax = 60s;
while (!handler->archives_.empty()) while (!handler->archives_.empty())
{ {
lock.unlock(); lock.unlock();
std::this_thread::sleep_for(std::chrono::milliseconds(100)); std::this_thread::sleep_for(1s);
if (waitMax -= 1s; waitMax <= 0s)
{
BEAST_EXPECT(false);
break;
}
lock.lock(); lock.lock();
} }
BEAST_EXPECT(!boost::filesystem::exists(stateDir)); BEAST_EXPECT(!boost::filesystem::exists(stateDir));
} }
// Destroy the singleton so we start fresh in // Test the shard downloading module by initiating
// the next testcase. // and completing ten downloads and verifying the
RPC::ShardArchiveHandler::instance_.reset(); // contents of the filesystem and the handler's
} // archives. Then restart the application and ensure
// that the handler is created and started automatically.
void void
testStateDatabase4() testDownloadsAndRestart()
{ {
testcase("testStateDatabase4"); testcase("testDownloadsAndRestart");
beast::temp_dir tempDir; beast::temp_dir tempDir;
@@ -247,24 +261,37 @@ public:
auto& section = c->section(ConfigSection::shardDatabase()); auto& section = c->section(ConfigSection::shardDatabase());
section.set("path", tempDir.path()); section.set("path", tempDir.path());
section.set("max_size_gb", "100"); section.set("max_size_gb", "100");
section.set("ledgers_per_shard", "256");
c->setupControl(true, true, true); c->setupControl(true, true, true);
jtx::Env env(*this, std::move(c)); jtx::Env env(*this, std::move(c));
auto handler = RPC::ShardArchiveHandler::getInstance(
env.app(), env.app().getJobQueue());
BEAST_EXPECT(handler);
BEAST_EXPECT(handler->init()); std::uint8_t const numberOfDownloads = 10;
// Create some ledgers so that the ShardArchiveHandler
// can verify the last ledger hash for the shard
// downloads.
for (int i = 0; i < env.app().getShardStore()->ledgersPerShard() *
(numberOfDownloads + 1);
++i)
{
env.close();
}
auto handler = env.app().getShardArchiveHandler();
BEAST_EXPECT(handler);
BEAST_EXPECT(
dynamic_cast<RPC::RecoveryHandler*>(handler) == nullptr);
auto server = createServer(env); auto server = createServer(env);
auto host = server.local_endpoint().address().to_string(); auto host = server.local_endpoint().address().to_string();
auto port = std::to_string(server.local_endpoint().port()); auto port = std::to_string(server.local_endpoint().port());
server.stop(); server.stop();
Downloads const dl = [&host, &port] { Downloads const dl = [count = numberOfDownloads, &host, &port] {
Downloads ret; Downloads ret;
for (int i = 1; i <= 10; ++i) for (int i = 1; i <= count; ++i)
{ {
ret.push_back( ret.push_back(
{i, {i,
@@ -298,10 +325,20 @@ public:
boost::filesystem::exists(stateDir) || boost::filesystem::exists(stateDir) ||
handler->archives_.empty()); handler->archives_.empty());
using namespace std::chrono_literals;
auto waitMax = 60s;
while (!handler->archives_.empty()) while (!handler->archives_.empty())
{ {
lock.unlock(); lock.unlock();
std::this_thread::sleep_for(std::chrono::milliseconds(100)); std::this_thread::sleep_for(1s);
if (waitMax -= 1s; waitMax <= 0s)
{
BEAST_EXPECT(false);
break;
}
lock.lock(); lock.lock();
} }
@@ -314,24 +351,31 @@ public:
stateDir / stateDBName); stateDir / stateDBName);
} }
// Destroy the singleton so we start fresh in
// the new scope.
RPC::ShardArchiveHandler::instance_.reset();
auto c = jtx::envconfig(); auto c = jtx::envconfig();
auto& section = c->section(ConfigSection::shardDatabase()); auto& section = c->section(ConfigSection::shardDatabase());
section.set("path", tempDir.path()); section.set("path", tempDir.path());
section.set("max_size_gb", "100"); section.set("max_size_gb", "100");
section.set("ledgers_per_shard", "256");
section.set("shard_verification_retry_interval", "1");
section.set("shard_verification_max_attempts", "10000");
c->setupControl(true, true, true); c->setupControl(true, true, true);
jtx::Env env(*this, std::move(c)); jtx::Env env(*this, std::move(c));
while (!RPC::ShardArchiveHandler::hasInstance()) std::uint8_t const numberOfDownloads = 10;
std::this_thread::sleep_for(std::chrono::milliseconds(100));
BEAST_EXPECT(RPC::ShardArchiveHandler::hasInstance()); // Create some ledgers so that the ShardArchiveHandler
// can verify the last ledger hash for the shard
// downloads.
for (int i = 0; i < env.app().getShardStore()->ledgersPerShard() *
(numberOfDownloads + 1);
++i)
{
env.close();
}
auto handler = RPC::ShardArchiveHandler::getInstance(); auto handler = env.app().getShardArchiveHandler();
BEAST_EXPECT(dynamic_cast<RPC::RecoveryHandler*>(handler) != nullptr);
auto stateDir = auto stateDir =
RPC::ShardArchiveHandler::getDownloadDirectory(env.app().config()); RPC::ShardArchiveHandler::getDownloadDirectory(env.app().config());
@@ -341,10 +385,20 @@ public:
BEAST_EXPECT( BEAST_EXPECT(
boost::filesystem::exists(stateDir) || handler->archives_.empty()); boost::filesystem::exists(stateDir) || handler->archives_.empty());
using namespace std::chrono_literals;
auto waitMax = 60s;
while (!handler->archives_.empty()) while (!handler->archives_.empty())
{ {
lock.unlock(); lock.unlock();
std::this_thread::sleep_for(std::chrono::milliseconds(100)); std::this_thread::sleep_for(1s);
if (waitMax -= 1s; waitMax <= 0s)
{
BEAST_EXPECT(false);
break;
}
lock.lock(); lock.lock();
} }
@@ -354,10 +408,10 @@ public:
void void
run() override run() override
{ {
testStateDatabase1(); testSingleDownloadAndStateDB();
testStateDatabase2(); testDownloadsAndStateDB();
testStateDatabase3(); testDownloadsAndFileSystem();
testStateDatabase4(); testDownloadsAndRestart();
} }
}; };