Add shard thread safety

This commit is contained in:
Miguel Portilla
2019-08-09 11:57:07 -04:00
committed by Nik Bougalis
parent 66fad62e66
commit 22c9de487a
4 changed files with 424 additions and 385 deletions

View File

@@ -26,6 +26,8 @@
#include <ripple/nodestore/Manager.h>
#include <boost/algorithm/string.hpp>
#include <boost/archive/text_iarchive.hpp>
#include <boost/archive/text_oarchive.hpp>
#include <boost/range/adaptor/transformed.hpp>
#include <fstream>
@@ -55,9 +57,8 @@ Shard::Shard(
bool
Shard::open(Scheduler& scheduler, nudb::context& ctx)
{
std::lock_guard lock(mutex_);
assert(!backend_);
using namespace boost::filesystem;
using namespace boost::beast::detail;
Config const& config {app_.config()};
Section section {config.section(ConfigSection::shardDatabase())};
@@ -108,7 +109,7 @@ Shard::open(Scheduler& scheduler, nudb::context& ctx)
if (!preexist)
{
// New shard, create a control file
if (!saveControl())
if (!saveControl(lock))
return fail({});
}
else if (is_regular_file(control_))
@@ -133,17 +134,19 @@ Shard::open(Scheduler& scheduler, nudb::context& ctx)
JLOG(j_.warn()) <<
"shard " << index_ <<
" has a control file for complete shard";
setComplete();
remove_all(control_);
setComplete(lock);
}
}
}
else
setComplete();
setComplete(lock);
setCache();
if (!initSQLite() || !setFileStats())
return fail({});
if (!complete_)
{
setCache(lock);
if (!initSQLite(lock) ||!setFileStats(lock))
return fail({});
}
}
catch (std::exception const& e)
{
@@ -157,7 +160,9 @@ Shard::open(Scheduler& scheduler, nudb::context& ctx)
bool
Shard::setStored(std::shared_ptr<Ledger const> const& ledger)
{
assert(backend_&& !complete_);
std::lock_guard lock(mutex_);
assert(backend_ && !complete_);
if (boost::icl::contains(storedSeqs_, ledger->info().seq))
{
JLOG(j_.debug()) <<
@@ -166,27 +171,16 @@ Shard::setStored(std::shared_ptr<Ledger const> const& ledger)
return false;
}
if (!setSQLiteStored(ledger))
if (!setSQLiteStored(ledger, lock))
return false;
// Check if the shard is complete
if (boost::icl::length(storedSeqs_) >= maxLedgers_ - 1)
{
setComplete();
if (backend_->backed())
{
if (!removeAll(control_, j_))
return false;
setCache();
if (!initSQLite() || !setFileStats())
return false;
}
}
setComplete(lock);
else
{
storedSeqs_.insert(ledger->info().seq);
if (backend_->backed() && !saveControl())
if (backend_->backed() && !saveControl(lock))
return false;
}
@@ -202,7 +196,9 @@ Shard::setStored(std::shared_ptr<Ledger const> const& ledger)
boost::optional<std::uint32_t>
Shard::prepare()
{
assert(backend_);
std::lock_guard lock(mutex_);
assert(backend_ && !complete_);
if (storedSeqs_.empty())
return lastSeq_;
return prevMissing(storedSeqs_, 1 + lastSeq_, firstSeq_);
@@ -211,33 +207,102 @@ Shard::prepare()
bool
Shard::contains(std::uint32_t seq) const
{
assert(backend_);
if (seq < firstSeq_ || seq > lastSeq_)
return false;
if (complete_)
return true;
return boost::icl::contains(storedSeqs_, seq);
std::lock_guard lock(mutex_);
assert(backend_);
return complete_ || boost::icl::contains(storedSeqs_, seq);
}
void
Shard::sweep()
{
assert(backend_);
std::lock_guard lock(mutex_);
assert(pCache_ && nCache_);
pCache_->sweep();
nCache_->sweep();
}
std::shared_ptr<Backend> const&
Shard::getBackend() const
{
std::lock_guard lock(mutex_);
assert(backend_);
return backend_;
}
bool
Shard::validate()
Shard::complete() const
{
std::lock_guard lock(mutex_);
assert(backend_);
return complete_;
}
std::shared_ptr<PCache>
Shard::pCache() const
{
std::lock_guard lock(mutex_);
assert(pCache_);
return pCache_;
}
std::shared_ptr<NCache>
Shard::nCache() const
{
std::lock_guard lock(mutex_);
assert(nCache_);
return nCache_;
}
std::uint64_t
Shard::fileSize() const
{
std::lock_guard lock(mutex_);
assert(backend_);
return fileSz_;
}
std::uint32_t
Shard::fdRequired() const
{
std::lock_guard lock(mutex_);
assert(backend_);
return fdRequired_;
}
std::shared_ptr<Ledger const>
Shard::lastStored() const
{
std::lock_guard lock(mutex_);
assert(backend_);
return lastStored_;
}
bool
Shard::validate() const
{
uint256 hash;
std::uint32_t seq;
std::shared_ptr<Ledger> ledger;
auto fail = [this](std::string const& msg)
std::uint32_t seq {0};
auto fail = [j = j_, index = index_, &hash, &seq](std::string const& msg)
{
JLOG(j_.error()) << "shard " << index_ << " " << msg;
JLOG(j.error()) <<
"shard " << index << ". " << msg <<
(hash.isZero() ? "" : ". Ledger hash " + to_string(hash)) <<
(seq == 0 ? "" : ". Ledger sequence " + std::to_string(seq));
return false;
};
std::shared_ptr<Ledger> ledger;
// Find the hash of the last ledger in this shard
{
@@ -245,13 +310,13 @@ Shard::validate()
"WHERE LedgerSeq >= " + std::to_string(lastSeq_) +
" order by LedgerSeq desc limit 1", app_, false);
if (!ledger)
return fail("is unable to validate due to lacking lookup data");
return fail("Unable to validate due to lacking lookup data");
if (seq != lastSeq_)
{
ledger->setImmutable(app_.config());
boost::optional<uint256> h;
ledger->setImmutable(app_.config());
try
{
h = hashOfSeq(*ledger, lastSeq_, j_);
@@ -263,215 +328,92 @@ Shard::validate()
}
if (!h)
{
return fail("is missing hash for last ledger sequence " +
std::to_string(lastSeq_));
}
return fail("Missing hash for last ledger sequence");
hash = *h;
seq = lastSeq_;
}
}
JLOG(j_.debug()) <<
"shard " << index_ <<
" has ledger sequences " << firstSeq_ << "-" << lastSeq_;
// Use a short age to keep memory consumption low
auto const savedAge {pCache_->getTargetAge()};
using namespace std::chrono_literals;
pCache_->setTargetAge(1s);
// Validate every ledger stored in this shard
std::shared_ptr<Ledger const> next;
while (seq >= firstSeq_)
{
auto nObj = valFetch(hash);
if (!nObj)
break;
return fail("Invalid ledger");
ledger = std::make_shared<Ledger>(
InboundLedger::deserializeHeader(makeSlice(nObj->getData()),
true), app_.config(), *app_.shardFamily());
InboundLedger::deserializeHeader(makeSlice(nObj->getData()), true),
app_.config(),
*app_.shardFamily());
if (ledger->info().seq != seq)
{
fail("encountered invalid ledger sequence " + std::to_string(seq));
break;
}
return fail("Invalid ledger header sequence");
if (ledger->info().hash != hash)
{
fail("encountered invalid ledger hash " + to_string(hash) +
" on sequence " + std::to_string(seq));
break;
}
return fail("Invalid ledger header hash");
ledger->stateMap().setLedgerSeq(seq);
ledger->txMap().setLedgerSeq(seq);
ledger->setImmutable(app_.config());
if (!ledger->stateMap().fetchRoot(
SHAMapHash {ledger->info().accountHash}, nullptr))
{
fail("is missing root STATE node on sequence " +
std::to_string(seq));
break;
return fail("Missing root STATE node");
}
if (ledger->info().txHash.isNonZero())
if (ledger->info().txHash.isNonZero() &&
!ledger->txMap().fetchRoot(
SHAMapHash {ledger->info().txHash},
nullptr))
{
if (!ledger->txMap().fetchRoot(
SHAMapHash {ledger->info().txHash}, nullptr))
{
fail("is missing root TXN node on sequence " +
std::to_string(seq));
break;
}
return fail("Missing root TXN node");
}
if (!valLedger(ledger, next))
break;
return false;
hash = ledger->info().parentHash;
--seq;
next = ledger;
if (seq % 128 == 0)
pCache_->sweep();
}
pCache_->reset();
nCache_->reset();
pCache_->setTargetAge(savedAge);
if (seq >= firstSeq_)
{
return fail(std::string(" is ") +
(complete_ ? "invalid, failed" : "incomplete, stopped") +
" on hash " + to_string(hash) + " on sequence " +
std::to_string(seq));
std::lock_guard lock(mutex_);
pCache_->reset();
nCache_->reset();
}
JLOG(j_.debug()) <<
"shard " << index_ << " is valid and complete";
JLOG(j_.debug()) << "shard " << index_ << " is valid";
return true;
}
bool
Shard::valLedger(std::shared_ptr<Ledger const> const& ledger,
std::shared_ptr<Ledger const> const& next)
Shard::setComplete(std::lock_guard<std::mutex> const& lock)
{
auto fail = [this](std::string const& msg)
{
JLOG(j_.error()) << "shard " << index_ << " " << msg;
return false;
};
if (ledger->info().hash.isZero())
{
return fail("encountered a zero ledger hash on sequence " +
std::to_string(ledger->info().seq));
}
if (ledger->info().accountHash.isZero())
{
return fail("encountered a zero account hash on sequence " +
std::to_string(ledger->info().seq));
}
bool error {false};
auto f = [this, &error](SHAMapAbstractNode& node)
{
if (!valFetch(node.getNodeHash().as_uint256()))
error = true;
return !error;
};
// Validate the state map
if (ledger->stateMap().getHash().isNonZero())
{
if (!ledger->stateMap().isValid())
{
return fail("has an invalid state map on sequence " +
std::to_string(ledger->info().seq));
}
try
{
if (next && next->info().parentHash == ledger->info().hash)
ledger->stateMap().visitDifferences(&next->stateMap(), f);
else
ledger->stateMap().visitNodes(f);
}
catch (std::exception const& e)
{
return fail(std::string("exception ") +
e.what() + " in function " + __func__);
}
if (error)
return false;
}
// Validate the transaction map
if (ledger->info().txHash.isNonZero())
{
if (!ledger->txMap().isValid())
{
return fail("has an invalid transaction map on sequence " +
std::to_string(ledger->info().seq));
}
try
{
ledger->txMap().visitNodes(f);
}
catch (std::exception const& e)
{
return fail(std::string("exception ") +
e.what() + " in function " + __func__);
}
if (error)
return false;
}
return true;
};
std::shared_ptr<NodeObject>
Shard::valFetch(uint256 const& hash)
{
assert(backend_);
std::shared_ptr<NodeObject> nObj;
auto fail = [this](std::string const& msg)
{
JLOG(j_.error()) << "shard " << index_ << " " << msg;
};
// Remove the control file if one exists
try
{
switch (backend_->fetch(hash.begin(), &nObj))
{
case ok:
break;
case notFound:
{
fail("is missing node object on hash " + to_string(hash));
break;
}
case dataCorrupt:
{
fail("has a corrupt node object on hash " + to_string(hash));
break;
}
default:
fail("encountered unknown error on hash " + to_string(hash));
}
using namespace boost::filesystem;
if (is_regular_file(control_))
remove_all(control_);
}
catch (std::exception const& e)
{
fail(std::string("exception ") +
e.what() + " in function " + __func__);
JLOG(j_.error()) <<
"shard " << index_ <<
" exception " << e.what() <<
" in function " << __func__;
return false;
}
return nObj;
}
void
Shard::setComplete()
{
storedSeqs_.clear();
complete_ = true;
setCache(lock);
return initSQLite(lock) && setFileStats(lock);
}
void
Shard::setCache()
Shard::setCache(std::lock_guard<std::mutex> const&)
{
// complete shards use the smallest cache and
// fastest expiration to reduce memory consumption.
@@ -503,7 +445,7 @@ Shard::setCache()
}
bool
Shard::initSQLite()
Shard::initSQLite(std::lock_guard<std::mutex> const&)
{
Config const& config {app_.config()};
DatabaseCon::Setup setup;
@@ -515,9 +457,8 @@ Shard::initSQLite()
{
if (complete_)
{
using namespace boost::filesystem;
// Remove WAL files if they exist
using namespace boost::filesystem;
for (auto const& d : directory_iterator(dir_))
{
if (is_regular_file(d) &&
@@ -601,7 +542,9 @@ Shard::initSQLite()
}
bool
Shard::setSQLiteStored(std::shared_ptr<Ledger const> const& ledger)
Shard::setSQLiteStored(
std::shared_ptr<Ledger const> const& ledger,
std::lock_guard<std::mutex> const&)
{
auto const seq {ledger->info().seq};
assert(backend_ && !complete_);
@@ -733,7 +676,7 @@ Shard::setSQLiteStored(std::shared_ptr<Ledger const> const& ledger)
}
bool
Shard::setFileStats()
Shard::setFileStats(std::lock_guard<std::mutex> const&)
{
fileSz_ = 0;
fdRequired_ = 0;
@@ -764,7 +707,7 @@ Shard::setFileStats()
}
bool
Shard::saveControl()
Shard::saveControl(std::lock_guard<std::mutex> const&)
{
std::ofstream ofs {control_.string(), std::ios::trunc};
if (!ofs.is_open())
@@ -779,5 +722,120 @@ Shard::saveControl()
return true;
}
bool
Shard::valLedger(
std::shared_ptr<Ledger const> const& ledger,
std::shared_ptr<Ledger const> const& next) const
{
auto fail = [j = j_, index = index_, &ledger](std::string const& msg)
{
JLOG(j.error()) <<
"shard " << index << ". " << msg <<
(ledger->info().hash.isZero() ?
"" : ". Ledger header hash " +
to_string(ledger->info().hash)) <<
(ledger->info().seq == 0 ?
"" : ". Ledger header sequence " +
std::to_string(ledger->info().seq));
return false;
};
if (ledger->info().hash.isZero())
return fail("Invalid ledger header hash");
if (ledger->info().accountHash.isZero())
return fail("Invalid ledger header account hash");
bool error {false};
auto visit = [this, &error](SHAMapAbstractNode& node)
{
if (!valFetch(node.getNodeHash().as_uint256()))
error = true;
return !error;
};
// Validate the state map
if (ledger->stateMap().getHash().isNonZero())
{
if (!ledger->stateMap().isValid())
return fail("Invalid state map");
try
{
if (next && next->info().parentHash == ledger->info().hash)
ledger->stateMap().visitDifferences(&next->stateMap(), visit);
else
ledger->stateMap().visitNodes(visit);
}
catch (std::exception const& e)
{
return fail(std::string("exception ") +
e.what() + " in function " + __func__);
}
if (error)
return fail("Invalid state map");
}
// Validate the transaction map
if (ledger->info().txHash.isNonZero())
{
if (!ledger->txMap().isValid())
return fail("Invalid transaction map");
try
{
ledger->txMap().visitNodes(visit);
}
catch (std::exception const& e)
{
return fail(std::string("exception ") +
e.what() + " in function " + __func__);
}
if (error)
return fail("Invalid transaction map");
}
return true;
};
std::shared_ptr<NodeObject>
Shard::valFetch(uint256 const& hash) const
{
std::shared_ptr<NodeObject> nObj;
auto fail = [j = j_, index = index_, &hash, &nObj](std::string const& msg)
{
JLOG(j.error()) <<
"shard " << index << ". " << msg <<
". Node object hash " << to_string(hash);
nObj.reset();
return nObj;
};
Status status;
try
{
{
std::lock_guard lock(mutex_);
status = backend_->fetch(hash.begin(), &nObj);
}
switch (status)
{
case ok:
break;
case notFound:
return fail("Missing node object");
case dataCorrupt:
return fail("Corrupt node object");
default:
return fail("Unknown error");
}
}
catch (std::exception const& e)
{
return fail(std::string("exception ") +
e.what() + " in function " + __func__);
}
return nObj;
}
} // NodeStore
} // ripple