refactor: Change recursive_mutex to mutex in DatabaseRotatingImp (#5276)

Rewrites the code so that the lock is not held during the callback. Instead it locks twice, once before, and once after. This is safe due to the structure of the code, but is checked after the second lock. This allows mutex_ to be changed back to a regular mutex.
This commit is contained in:
Ed Hennis
2025-02-13 17:32:37 -05:00
committed by GitHub
parent 97e3dae6f4
commit 01fe9477f4
6 changed files with 172 additions and 27 deletions

View File

@@ -19,6 +19,7 @@ test.app > xrpl.basics
test.app > xrpld.app test.app > xrpld.app
test.app > xrpld.core test.app > xrpld.core
test.app > xrpld.ledger test.app > xrpld.ledger
test.app > xrpld.nodestore
test.app > xrpld.overlay test.app > xrpld.overlay
test.app > xrpld.rpc test.app > xrpld.rpc
test.app > xrpl.json test.app > xrpl.json

View File

@@ -20,9 +20,11 @@
#include <test/jtx.h> #include <test/jtx.h>
#include <test/jtx/envconfig.h> #include <test/jtx/envconfig.h>
#include <xrpld/app/main/Application.h> #include <xrpld/app/main/Application.h>
#include <xrpld/app/main/NodeStoreScheduler.h>
#include <xrpld/app/misc/SHAMapStore.h> #include <xrpld/app/misc/SHAMapStore.h>
#include <xrpld/app/rdb/backend/SQLiteDatabase.h> #include <xrpld/app/rdb/backend/SQLiteDatabase.h>
#include <xrpld/core/ConfigSections.h> #include <xrpld/core/ConfigSections.h>
#include <xrpld/nodestore/detail/DatabaseRotatingImp.h>
#include <xrpl/protocol/jss.h> #include <xrpl/protocol/jss.h>
namespace ripple { namespace ripple {
@@ -518,12 +520,136 @@ public:
lastRotated = ledgerSeq - 1; lastRotated = ledgerSeq - 1;
} }
std::unique_ptr<NodeStore::Backend>
makeBackendRotating(
jtx::Env& env,
NodeStoreScheduler& scheduler,
std::string path)
{
Section section{
env.app().config().section(ConfigSection::nodeDatabase())};
boost::filesystem::path newPath;
if (!BEAST_EXPECT(path.size()))
return {};
newPath = path;
section.set("path", newPath.string());
auto backend{NodeStore::Manager::instance().make_Backend(
section,
megabytes(env.app().config().getValueFor(
SizedItem::burstSize, std::nullopt)),
scheduler,
env.app().logs().journal("NodeStoreTest"))};
backend->open();
return backend;
}
void
testRotate()
{
// The only purpose of this test is to ensure that if something that
// should never happen happens, we don't get a deadlock.
testcase("rotate with lock contention");
using namespace jtx;
Env env(*this, envconfig(onlineDelete));
/////////////////////////////////////////////////////////////
// Create the backend. Normally, SHAMapStoreImp handles all these
// details
auto nscfg = env.app().config().section(ConfigSection::nodeDatabase());
// Provide default values:
if (!nscfg.exists("cache_size"))
nscfg.set(
"cache_size",
std::to_string(env.app().config().getValueFor(
SizedItem::treeCacheSize, std::nullopt)));
if (!nscfg.exists("cache_age"))
nscfg.set(
"cache_age",
std::to_string(env.app().config().getValueFor(
SizedItem::treeCacheAge, std::nullopt)));
NodeStoreScheduler scheduler(env.app().getJobQueue());
std::string const writableDb = "write";
std::string const archiveDb = "archive";
auto writableBackend = makeBackendRotating(env, scheduler, writableDb);
auto archiveBackend = makeBackendRotating(env, scheduler, archiveDb);
// Create NodeStore with two backends to allow online deletion of
// data
constexpr int readThreads = 4;
auto dbr = std::make_unique<NodeStore::DatabaseRotatingImp>(
scheduler,
readThreads,
std::move(writableBackend),
std::move(archiveBackend),
nscfg,
env.app().logs().journal("NodeStoreTest"));
/////////////////////////////////////////////////////////////
// Check basic functionality
using namespace std::chrono_literals;
std::atomic<int> threadNum = 0;
{
auto newBackend = makeBackendRotating(
env, scheduler, std::to_string(++threadNum));
auto const cb = [&](std::string const& writableName,
std::string const& archiveName) {
BEAST_EXPECT(writableName == "1");
BEAST_EXPECT(archiveName == "write");
// Ensure that dbr functions can be called from within the
// callback
BEAST_EXPECT(dbr->getName() == "1");
};
dbr->rotate(std::move(newBackend), cb);
}
BEAST_EXPECT(threadNum == 1);
BEAST_EXPECT(dbr->getName() == "1");
/////////////////////////////////////////////////////////////
// Do something stupid. Try to re-enter rotate from inside the callback.
{
auto const cb = [&](std::string const& writableName,
std::string const& archiveName) {
BEAST_EXPECT(writableName == "3");
BEAST_EXPECT(archiveName == "2");
// Ensure that dbr functions can be called from within the
// callback
BEAST_EXPECT(dbr->getName() == "3");
};
auto const cbReentrant = [&](std::string const& writableName,
std::string const& archiveName) {
BEAST_EXPECT(writableName == "2");
BEAST_EXPECT(archiveName == "1");
auto newBackend = makeBackendRotating(
env, scheduler, std::to_string(++threadNum));
// Reminder: doing this is stupid and should never happen
dbr->rotate(std::move(newBackend), cb);
};
auto newBackend = makeBackendRotating(
env, scheduler, std::to_string(++threadNum));
dbr->rotate(std::move(newBackend), cbReentrant);
}
BEAST_EXPECT(threadNum == 3);
BEAST_EXPECT(dbr->getName() == "3");
}
void void
run() override run() override
{ {
testClear(); testClear();
testAutomatic(); testAutomatic();
testCanDelete(); testCanDelete();
testRotate();
} }
}; };

View File

@@ -366,17 +366,17 @@ SHAMapStoreImp::run()
lastRotated = validatedSeq; lastRotated = validatedSeq;
dbRotating_->rotateWithLock( dbRotating_->rotate(
[&](std::string const& writableBackendName) { std::move(newBackend),
[&](std::string const& writableName,
std::string const& archiveName) {
SavedState savedState; SavedState savedState;
savedState.writableDb = newBackend->getName(); savedState.writableDb = writableName;
savedState.archiveDb = writableBackendName; savedState.archiveDb = archiveName;
savedState.lastRotated = lastRotated; savedState.lastRotated = lastRotated;
state_db_.setState(savedState); state_db_.setState(savedState);
clearCaches(validatedSeq); clearCaches(validatedSeq);
return std::move(newBackend);
}); });
JLOG(journal_.warn()) << "finished rotation " << validatedSeq; JLOG(journal_.warn()) << "finished rotation " << validatedSeq;

View File

@@ -44,11 +44,17 @@ public:
/** Rotates the backends. /** Rotates the backends.
@param f A function executed before the rotation and under the same lock @param newBackend New writable backend
@param f A function executed after the rotation outside of lock. The
values passed to f will be the new backend database names _after_
rotation.
*/ */
virtual void virtual void
rotateWithLock(std::function<std::unique_ptr<NodeStore::Backend>( rotate(
std::string const& writableBackendName)> const& f) = 0; std::unique_ptr<NodeStore::Backend>&& newBackend,
std::function<void(
std::string const& writableName,
std::string const& archiveName)> const& f) = 0;
}; };
} // namespace NodeStore } // namespace NodeStore

View File

@@ -41,18 +41,34 @@ DatabaseRotatingImp::DatabaseRotatingImp(
} }
void void
DatabaseRotatingImp::rotateWithLock( DatabaseRotatingImp::rotate(
std::function<std::unique_ptr<NodeStore::Backend>( std::unique_ptr<NodeStore::Backend>&& newBackend,
std::string const& writableBackendName)> const& f) std::function<void(
std::string const& writableName,
std::string const& archiveName)> const& f)
{
// Pass these two names to the callback function
std::string const newWritableBackendName = newBackend->getName();
std::string newArchiveBackendName;
// Hold on to current archive backend pointer until after the
// callback finishes. Only then will the archive directory be
// deleted.
std::shared_ptr<NodeStore::Backend> oldArchiveBackend;
{ {
std::lock_guard lock(mutex_); std::lock_guard lock(mutex_);
auto newBackend = f(writableBackend_->getName());
archiveBackend_->setDeletePath(); archiveBackend_->setDeletePath();
oldArchiveBackend = std::move(archiveBackend_);
archiveBackend_ = std::move(writableBackend_); archiveBackend_ = std::move(writableBackend_);
newArchiveBackendName = archiveBackend_->getName();
writableBackend_ = std::move(newBackend); writableBackend_ = std::move(newBackend);
} }
f(newWritableBackendName, newArchiveBackendName);
}
std::string std::string
DatabaseRotatingImp::getName() const DatabaseRotatingImp::getName() const
{ {

View File

@@ -49,9 +49,11 @@ public:
} }
void void
rotateWithLock( rotate(
std::function<std::unique_ptr<NodeStore::Backend>( std::unique_ptr<NodeStore::Backend>&& newBackend,
std::string const& writableBackendName)> const& f) override; std::function<void(
std::string const& writableName,
std::string const& archiveName)> const& f) override;
std::string std::string
getName() const override; getName() const override;
@@ -82,13 +84,7 @@ public:
private: private:
std::shared_ptr<Backend> writableBackend_; std::shared_ptr<Backend> writableBackend_;
std::shared_ptr<Backend> archiveBackend_; std::shared_ptr<Backend> archiveBackend_;
// This needs to be a recursive mutex because callbacks in `rotateWithLock` mutable std::mutex mutex_;
// can call function that also lock the mutex. A current example of this is
// a callback from SHAMapStoreImp, which calls `clearCaches`. This
// `clearCaches` call eventually calls `fetchNodeObject` which tries to
// relock the mutex. It would be desirable to rewrite the code so the lock
// was not held during a callback.
mutable std::recursive_mutex mutex_;
std::shared_ptr<NodeObject> std::shared_ptr<NodeObject>
fetchNodeObject( fetchNodeObject(