NetworkOPs isn't stopped() until Jobs done (RIPD-1356):

A new JobCounter class is introduced.  The JobCounter keeps
a reference count of Jobs in flight to the JobQueue.  When
NetworkOPs needs to stop, in addition to other work, it calls
JobCounter::join(), which waits until all Jobs in flight
have been destroyed before returning.  This ensures that all
NetworkOPs Jobs are completed before NetworkOPs declares
itself stopped().

Also, once a JobCounter is join()ed, it refuses to produce
more counted Jobs for the JobQueue.  So, once all old Jobs
in flight are done, then NetworkOPs will add no additional
Jobs to the JobQueue.

Other classes besides NetworkOPs should also be able to use
JobCounter.  NetworkOPs is a first test case.

Also unneeded #includes were removed from files touched for
other reasons.
This commit is contained in:
Scott Schurr
2017-02-24 16:40:23 -08:00
committed by Brad Chase
parent 1bb92d40aa
commit c453df927f
7 changed files with 391 additions and 43 deletions

View File

@@ -1935,6 +1935,8 @@
</ClInclude>
<ClInclude Include="..\..\src\ripple\core\Job.h">
</ClInclude>
<ClInclude Include="..\..\src\ripple\core\JobCounter.h">
</ClInclude>
<ClInclude Include="..\..\src\ripple\core\JobQueue.h">
</ClInclude>
<ClInclude Include="..\..\src\ripple\core\JobTypeData.h">
@@ -4417,6 +4419,10 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='debug|x64'">True</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='release|x64'">True</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\test\core\JobCounter_test.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='debug|x64'">True</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='release|x64'">True</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="..\..\src\test\core\SociDB_test.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='debug|x64'">True</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='release|x64'">True</ExcludedFromBuild>

View File

@@ -2532,6 +2532,9 @@
<ClInclude Include="..\..\src\ripple\core\Job.h">
<Filter>ripple\core</Filter>
</ClInclude>
<ClInclude Include="..\..\src\ripple\core\JobCounter.h">
<Filter>ripple\core</Filter>
</ClInclude>
<ClInclude Include="..\..\src\ripple\core\JobQueue.h">
<Filter>ripple\core</Filter>
</ClInclude>
@@ -5160,6 +5163,9 @@
<ClCompile Include="..\..\src\test\core\DeadlineTimer_test.cpp">
<Filter>test\core</Filter>
</ClCompile>
<ClCompile Include="..\..\src\test\core\JobCounter_test.cpp">
<Filter>test\core</Filter>
</ClCompile>
<ClCompile Include="..\..\src\test\core\SociDB_test.cpp">
<Filter>test\core</Filter>
</ClCompile>

View File

@@ -19,13 +19,9 @@
#include <BeastConfig.h>
#include <ripple/app/misc/NetworkOPs.h>
#include <ripple/protocol/Quality.h>
#include <ripple/core/DatabaseCon.h>
#include <ripple/consensus/Consensus.h>
#include <ripple/app/main/Application.h>
#include <ripple/app/consensus/RCLConsensus.h>
#include <ripple/app/ledger/AcceptedLedger.h>
#include <ripple/app/ledger/InboundLedger.h>
#include <ripple/app/ledger/InboundLedgers.h>
#include <ripple/app/ledger/LedgerMaster.h>
#include <ripple/consensus/LedgerTiming.h>
@@ -39,36 +35,21 @@
#include <ripple/app/misc/LoadFeeTrack.h>
#include <ripple/app/misc/Transaction.h>
#include <ripple/app/misc/TxQ.h>
#include <ripple/app/misc/Validations.h>
#include <ripple/app/misc/ValidatorList.h>
#include <ripple/app/misc/impl/AccountTxPaging.h>
#include <ripple/app/tx/apply.h>
#include <ripple/basics/contract.h>
#include <ripple/basics/Log.h>
#include <ripple/basics/mulDiv.h>
#include <ripple/basics/random.h>
#include <ripple/protocol/digest.h>
#include <ripple/basics/StringUtilities.h>
#include <ripple/basics/UptimeTimer.h>
#include <ripple/protocol/JsonFields.h>
#include <ripple/core/Config.h>
#include <ripple/core/ConfigSections.h>
#include <ripple/core/DeadlineTimer.h>
#include <ripple/core/TimeKeeper.h>
#include <ripple/core/JobCounter.h>
#include <ripple/crypto/csprng.h>
#include <ripple/crypto/RFC1751.h>
#include <ripple/json/to_string.h>
#include <ripple/overlay/ClusterNode.h>
#include <ripple/overlay/Cluster.h>
#include <ripple/overlay/Overlay.h>
#include <ripple/overlay/predicates.h>
#include <ripple/protocol/BuildInfo.h>
#include <ripple/protocol/Feature.h>
#include <ripple/protocol/HashPrefix.h>
#include <ripple/protocol/Indexes.h>
#include <ripple/protocol/Rate.h>
#include <ripple/resource/Fees.h>
#include <ripple/resource/Gossip.h>
#include <ripple/resource/ResourceManager.h>
#include <ripple/beast/rfc2616.h>
#include <ripple/beast/core/LexicalCast.h>
@@ -76,11 +57,6 @@
#include <ripple/beast/utility/rngfill.h>
#include <ripple/basics/make_lock.h>
#include <beast/core/detail/base64.hpp>
#include <boost/optional.hpp>
#include <condition_variable>
#include <memory>
#include <mutex>
#include <tuple>
namespace ripple {
@@ -237,7 +213,10 @@ public:
{
}
~NetworkOPsImp() override = default;
~NetworkOPsImp() override
{
jobCounter_.join();
}
public:
OperatingMode getOperatingMode () const override
@@ -508,6 +487,9 @@ public:
m_heartbeatTimer.cancel();
m_clusterTimer.cancel();
// Wait until all our in-flight Jobs are completed.
jobCounter_.join();
stopped ();
}
@@ -559,6 +541,7 @@ private:
DeadlineTimer m_heartbeatTimer;
DeadlineTimer m_clusterTimer;
JobCounter jobCounter_;
std::shared_ptr<RCLConsensus> mConsensus;
@@ -668,13 +651,15 @@ void NetworkOPsImp::onDeadlineTimer (DeadlineTimer& timer)
{
if (timer == m_heartbeatTimer)
{
m_job_queue.addJob (jtNETOP_TIMER, "NetOPs.heartbeat",
[this] (Job&) { processHeartbeatTimer(); });
m_job_queue.addCountedJob (
jtNETOP_TIMER, "NetOPs.heartbeat", jobCounter_,
[this] (Job&) { processHeartbeatTimer(); });
}
else if (timer == m_clusterTimer)
{
m_job_queue.addJob (jtNETOP_CLUSTER, "NetOPs.cluster",
[this] (Job&) { processClusterTimer(); });
m_job_queue.addCountedJob (
jtNETOP_CLUSTER, "NetOPs.cluster", jobCounter_,
[this] (Job&) { processClusterTimer(); });
}
}
@@ -740,6 +725,7 @@ void NetworkOPsImp::processClusterTimer ()
if (!update)
{
JLOG(m_journal.debug()) << "Too soon to send cluster update";
setClusterTimer ();
return;
}
@@ -841,10 +827,12 @@ void NetworkOPsImp::submitTransaction (std::shared_ptr<STTx const> const& iTrans
auto tx = std::make_shared<Transaction> (
trans, reason, app_);
m_job_queue.addJob (jtTRANSACTION, "submitTxn", [this, tx] (Job&) {
auto t = tx;
processTransaction(t, false, false, FailHard::no);
});
m_job_queue.addCountedJob (
jtTRANSACTION, "submitTxn", jobCounter_,
[this, tx] (Job&) {
auto t = tx;
processTransaction(t, false, false, FailHard::no);
});
}
void NetworkOPsImp::processTransaction (std::shared_ptr<Transaction>& transaction,
@@ -906,9 +894,12 @@ void NetworkOPsImp::doTransactionAsync (std::shared_ptr<Transaction> transaction
if (mDispatchState == DispatchState::none)
{
m_job_queue.addJob (jtBATCH, "transactionBatch",
[this] (Job&) { transactionBatch(); });
mDispatchState = DispatchState::scheduled;
if (m_job_queue.addCountedJob (
jtBATCH, "transactionBatch", jobCounter_,
[this] (Job&) { transactionBatch(); }))
{
mDispatchState = DispatchState::scheduled;
}
}
}
@@ -938,9 +929,12 @@ void NetworkOPsImp::doTransactionSync (std::shared_ptr<Transaction> transaction,
if (mTransactions.size())
{
// More transactions need to be applied, but by another job.
m_job_queue.addJob (jtBATCH, "transactionBatch",
[this] (Job&) { transactionBatch(); });
mDispatchState = DispatchState::scheduled;
if (m_job_queue.addCountedJob (
jtBATCH, "transactionBatch", jobCounter_,
[this] (Job&) { transactionBatch(); }))
{
mDispatchState = DispatchState::scheduled;
}
}
}
}
@@ -2460,12 +2454,12 @@ void NetworkOPsImp::reportFeeChange ()
app_.getTxQ().getMetrics(*app_.openLedger().current()),
app_.getFeeTrack()};
// only schedule the job if something has changed
if (f != mLastFeeSummary)
{
m_job_queue.addJob ( jtCLIENT, "reportFeeChange->pubServer",
[this] (Job&) { pubServer(); });
m_job_queue.addCountedJob (
jtCLIENT, "reportFeeChange->pubServer", jobCounter_,
[this] (Job&) { pubServer(); });
}
}

View File

@@ -0,0 +1,191 @@
//------------------------------------------------------------------------------
/*
This file is part of rippled: https://github.com/ripple/rippled
Copyright (c) 2017 Ripple Labs Inc.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef RIPPLE_CORE_JOB_COUNTER_H_INCLUDED
#define RIPPLE_CORE_JOB_COUNTER_H_INCLUDED
#include <ripple/core/Job.h>
#include <boost/optional.hpp>
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <type_traits>
namespace ripple {
// A class that does reference counting for Jobs. The reference counting
// allows a Stoppable to assure that all child Jobs in the JobQueue are
// completed before the Stoppable declares itself stopped().
class JobCounter
{
private:
std::mutex mutable mutex_ {};
std::condition_variable allJobsDoneCond_ {}; // guard with mutex_
bool waitForJobs_ {false}; // guard with mutex_
std::atomic<int> jobCount_ {0};
// Increment the count.
JobCounter& operator++()
{
++jobCount_;
return *this;
}
// Decrement the count. If we're stopping and the count drops to zero
// notify allJobsDoneCond_.
JobCounter& operator--()
{
// Even though jobCount_ is atomic, we decrement its value under a
// lock. This removes a small timing window that occurs if the
// waiting thread is handling a spurious wakeup when jobCount_
// drops to zero.
std::lock_guard<std::mutex> lock {mutex_};
// Update jobCount_. Notify if we're stopping and all jobs are done.
if ((--jobCount_ == 0) && waitForJobs_)
allJobsDoneCond_.notify_all();
return *this;
}
// A private template class that helps count the number of Jobs
// in flight. This allows Stoppables to hold off declaring stopped()
// until all their JobQueue Jobs are dispatched.
template <typename JobHandler>
class CountedJob
{
private:
JobCounter& counter_;
JobHandler handler_;
static_assert (
std::is_same<decltype(
handler_(std::declval<Job&>())), void>::value,
"JobHandler must be callable with Job&");
public:
CountedJob() = delete;
CountedJob (CountedJob const& rhs)
: counter_ (rhs.counter_)
, handler_ (rhs.handler_)
{
++counter_;
}
CountedJob (CountedJob&& rhs)
: counter_ (rhs.counter_)
, handler_ (std::move (rhs.handler_))
{
++counter_;
}
CountedJob (JobCounter& counter, JobHandler&& handler)
: counter_ (counter)
, handler_ (std::move (handler))
{
++counter_;
}
CountedJob& operator=(CountedJob const& rhs) = delete;
CountedJob& operator=(CountedJob&& rhs) = delete;
~CountedJob()
{
--counter_;
}
void operator ()(Job& job)
{
return handler_ (job);
}
};
public:
JobCounter() = default;
// Not copyable or movable. Outstanding counts would be hard to sort out.
JobCounter (JobCounter const&) = delete;
JobCounter& operator=(JobCounter const&) = delete;
/** Destructor verifies all in-flight jobs are complete. */
~JobCounter()
{
join();
}
/** Returns once all counted in-flight Jobs are destroyed. */
void join()
{
std::unique_lock<std::mutex> lock {mutex_};
waitForJobs_ = true;
if (jobCount_ > 0)
{
allJobsDoneCond_.wait (
lock, [this] { return jobCount_ == 0; });
}
}
/** Wrap the passed lambda with a reference counter.
@param handler Lambda that accepts a Job& parameter and returns void.
@return If join() has been called returns boost::none. Otherwise
returns a boost::optional that wraps handler with a
reference counter.
*/
template <class JobHandler>
boost::optional<CountedJob<JobHandler>>
wrap (JobHandler&& handler)
{
// The current intention is that wrap() may only be called with an
// rvalue lambda. That can be adjusted in the future if needed,
// but the following static_assert covers current expectations.
static_assert (std::is_rvalue_reference<decltype (handler)>::value,
"JobCounter::wrap() only supports rvalue lambdas.");
boost::optional<CountedJob<JobHandler>> ret;
std::lock_guard<std::mutex> lock {mutex_};
if (! waitForJobs_)
ret.emplace (*this, std::move (handler));
return ret;
}
/** Current number of Jobs outstanding. Only useful for testing. */
int count() const
{
return jobCount_;
}
/** Returns true if this has been joined.
Even if true is returned, counted Jobs may still be in flight.
However if (joined() && (count() == 0)) there should be no more
counted Jobs in flight.
*/
bool joined() const
{
std::lock_guard<std::mutex> lock {mutex_};
return waitForJobs_;
}
};
} // ripple
#endif // RIPPLE_CORE_JOB_COUNTER_H_INCLUDED

View File

@@ -22,6 +22,7 @@
#include <ripple/basics/LocalValue.h>
#include <ripple/basics/win32_workaround.h>
#include <ripple/core/JobCounter.h>
#include <ripple/core/JobTypes.h>
#include <ripple/core/JobTypeData.h>
#include <ripple/core/Stoppable.h>
@@ -110,8 +111,35 @@ public:
Stoppable& parent, beast::Journal journal, Logs& logs);
~JobQueue ();
/** Adds a job to the JobQueue.
@param t The type of job.
@param name Name of the job.
@param func std::function with signature void (Job&). Called when the job is executed.
*/
void addJob (JobType type, std::string const& name, JobFunction const& func);
/** Adds a counted job to the JobQueue.
@param t The type of job.
@param name Name of the job.
@param counter JobCounter for counting the Job.
@param jobHandler Lambda with signature void (Job&). Called when the job is executed.
@return true if JobHandler added, false if JobCounter is already joined.
*/
template <typename JobHandler>
bool addCountedJob (JobType type,
std::string const& name, JobCounter& counter, JobHandler&& jobHandler)
{
if (auto optionalCountedJob = counter.wrap (std::move (jobHandler)))
{
addJob (type, name, std::move (*optionalCountedJob));
return true;
}
return false;
}
/** Creates a coroutine and adds a job to the queue which will run it.
@param t The type of job.

View File

@@ -0,0 +1,122 @@
//------------------------------------------------------------------------------
/*
This file is part of rippled: https://github.com/ripple/rippled
Copyright (c) 2017 Ripple Labs Inc.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#include <BeastConfig.h>
#include <ripple/core/JobCounter.h>
#include <ripple/beast/unit_test.h>
#include <atomic>
#include <chrono>
#include <thread>
namespace ripple {
//------------------------------------------------------------------------------
class JobCounter_test : public beast::unit_test::suite
{
void testWrap()
{
// Verify reference counting.
JobCounter jobCounter;
BEAST_EXPECT (jobCounter.count() == 0);
{
auto wrapped1 = jobCounter.wrap ([] (Job&) {});
BEAST_EXPECT (jobCounter.count() == 1);
// wrapped1 should be callable with a Job.
{
Job j;
(*wrapped1)(j);
}
{
// Copy should increase reference count.
auto wrapped2 (wrapped1);
BEAST_EXPECT (jobCounter.count() == 2);
{
// Move should increase reference count.
auto wrapped3 (std::move(wrapped2));
BEAST_EXPECT (jobCounter.count() == 3);
{
// An additional Job also increases count.
auto wrapped4 = jobCounter.wrap ([] (Job&) {});
BEAST_EXPECT (jobCounter.count() == 4);
}
BEAST_EXPECT (jobCounter.count() == 3);
}
BEAST_EXPECT (jobCounter.count() == 2);
}
BEAST_EXPECT (jobCounter.count() == 1);
}
BEAST_EXPECT (jobCounter.count() == 0);
// Join with 0 count should not stall.
jobCounter.join();
// Wrapping a Job after join() should return boost::none.
BEAST_EXPECT (jobCounter.wrap ([] (Job&) {}) == boost::none);
}
void testWaitOnJoin()
{
// Verify reference counting.
JobCounter jobCounter;
BEAST_EXPECT (jobCounter.count() == 0);
auto job = (jobCounter.wrap ([] (Job&) {}));
BEAST_EXPECT (jobCounter.count() == 1);
// Calling join() now should stall, so do it on a different thread.
std::atomic<bool> threadExited {false};
std::thread localThread ([&jobCounter, &threadExited] ()
{
// Should stall after calling join.
jobCounter.join();
threadExited.store (true);
});
// Wait for the thread to call jobCounter.join().
while (! jobCounter.joined());
// The thread should still be active after waiting a millisecond.
// This is not a guarantee that join() stalled the thread, but it
// improves confidence.
using namespace std::chrono_literals;
std::this_thread::sleep_for (1ms);
BEAST_EXPECT (threadExited == false);
// Destroy the Job and expect the thread to exit (asynchronously).
job = boost::none;
BEAST_EXPECT (jobCounter.count() == 0);
// Wait for the thread to exit.
while (threadExited == false);
localThread.join();
}
public:
void run()
{
testWrap();
testWaitOnJoin();
}
};
BEAST_DEFINE_TESTSUITE(JobCounter, core, ripple);
}

View File

@@ -21,6 +21,7 @@
#include <test/core/Config_test.cpp>
#include <test/core/Coroutine_test.cpp>
#include <test/core/DeadlineTimer_test.cpp>
#include <test/core/JobCounter_test.cpp>
#include <test/core/SociDB_test.cpp>
#include <test/core/Stoppable_test.cpp>
#include <test/core/TerminateHandler_test.cpp>