From fdd1f2ec367539340f79e5ff4eb9b705d936e20b Mon Sep 17 00:00:00 2001 From: Scott Schurr Date: Wed, 25 May 2016 19:13:34 -0700 Subject: [PATCH] Log uncaught exceptions at the top of threads (RIPD-1166) --- Builds/VisualStudio2015/RippleD.vcxproj | 8 + .../VisualStudio2015/RippleD.vcxproj.filters | 6 + src/ripple/app/ledger/impl/LedgerCleaner.cpp | 7 + src/ripple/app/main/LoadManager.cpp | 6 + src/ripple/app/main/LoadManager.h | 1 + src/ripple/app/misc/SHAMapStoreImp.cpp | 8 + src/ripple/app/misc/SHAMapStoreImp.h | 1 + src/ripple/basics/Log.h | 8 +- src/ripple/basics/impl/Log.cpp | 11 +- src/ripple/core/Job.h | 3 + src/ripple/core/ReportUncaughtException.h | 153 ++++++++++++++++ src/ripple/core/impl/DeadlineTimer.cpp | 9 +- src/ripple/core/impl/Job.cpp | 30 +++- src/ripple/core/impl/SNTPClock.cpp | 12 +- src/ripple/core/impl/Workers.cpp | 11 ++ src/ripple/core/impl/Workers.h | 5 +- .../tests/ReportUncaughtException.test.cpp | 169 ++++++++++++++++++ src/ripple/nodestore/impl/DatabaseImp.h | 10 +- src/ripple/resource/impl/ResourceManager.cpp | 7 + src/ripple/unity/core.cpp | 1 + src/ripple/websocket/Server.h | 7 + 21 files changed, 452 insertions(+), 21 deletions(-) create mode 100644 src/ripple/core/ReportUncaughtException.h create mode 100644 src/ripple/core/tests/ReportUncaughtException.test.cpp diff --git a/Builds/VisualStudio2015/RippleD.vcxproj b/Builds/VisualStudio2015/RippleD.vcxproj index 2a5e8e6a9..90267b318 100644 --- a/Builds/VisualStudio2015/RippleD.vcxproj +++ b/Builds/VisualStudio2015/RippleD.vcxproj @@ -2047,6 +2047,8 @@ + + @@ -2069,6 +2071,12 @@ ..\..\src\soci\src\core;..\..\src\sqlite;%(AdditionalIncludeDirectories) ..\..\src\soci\src\core;..\..\src\sqlite;%(AdditionalIncludeDirectories) + + True + True + ..\..\src\soci\src\core;..\..\src\sqlite;%(AdditionalIncludeDirectories) + ..\..\src\soci\src\core;..\..\src\sqlite;%(AdditionalIncludeDirectories) + True True diff --git a/Builds/VisualStudio2015/RippleD.vcxproj.filters b/Builds/VisualStudio2015/RippleD.vcxproj.filters index 8da166a6b..2949e6b0e 100644 --- a/Builds/VisualStudio2015/RippleD.vcxproj.filters +++ b/Builds/VisualStudio2015/RippleD.vcxproj.filters @@ -2592,6 +2592,9 @@ ripple\core + + ripple\core + ripple\core @@ -2607,6 +2610,9 @@ ripple\core\tests + + ripple\core\tests + ripple\core\tests diff --git a/src/ripple/app/ledger/impl/LedgerCleaner.cpp b/src/ripple/app/ledger/impl/LedgerCleaner.cpp index 21c203a9e..4303a391b 100644 --- a/src/ripple/app/ledger/impl/LedgerCleaner.cpp +++ b/src/ripple/app/ledger/impl/LedgerCleaner.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -241,6 +242,12 @@ private: } void run () + { + reportUncaughtException ( + this, &LedgerCleanerImp::runImpl, "LedgerCleanerImp::run()"); + } + + void runImpl () { beast::Thread::setCurrentThreadName ("LedgerCleaner"); JLOG (j_.debug()) << "Started"; diff --git a/src/ripple/app/main/LoadManager.cpp b/src/ripple/app/main/LoadManager.cpp index af652bbd0..3ae8e442b 100644 --- a/src/ripple/app/main/LoadManager.cpp +++ b/src/ripple/app/main/LoadManager.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -106,6 +107,11 @@ void LoadManager::onStop () //------------------------------------------------------------------------------ void LoadManager::run () +{ + reportUncaughtException (this, &LoadManager::runImpl, "LoadManager::run()"); +} + +void LoadManager::runImpl () { beast::Thread::setCurrentThreadName ("LoadManager"); diff --git a/src/ripple/app/main/LoadManager.h b/src/ripple/app/main/LoadManager.h index d5d57404b..ab6c8b53c 100644 --- a/src/ripple/app/main/LoadManager.h +++ b/src/ripple/app/main/LoadManager.h @@ -97,6 +97,7 @@ public: private: void run (); + void runImpl (); private: Application& app_; diff --git a/src/ripple/app/misc/SHAMapStoreImp.cpp b/src/ripple/app/misc/SHAMapStoreImp.cpp index 64c1e4373..80f039b20 100644 --- a/src/ripple/app/misc/SHAMapStoreImp.cpp +++ b/src/ripple/app/misc/SHAMapStoreImp.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -296,6 +297,13 @@ SHAMapStoreImp::copyNode (std::uint64_t& nodeCount, void SHAMapStoreImp::run() +{ + reportUncaughtException ( + this, &SHAMapStoreImp::runImpl, "SHAMapStoreImp::run()"); +} + +void +SHAMapStoreImp::runImpl() { LedgerIndex lastRotated = state_db_.getState().lastRotated; netOPs_ = &app_.getOPs(); diff --git a/src/ripple/app/misc/SHAMapStoreImp.h b/src/ripple/app/misc/SHAMapStoreImp.h index f4cb75a02..d05b9eae0 100644 --- a/src/ripple/app/misc/SHAMapStoreImp.h +++ b/src/ripple/app/misc/SHAMapStoreImp.h @@ -175,6 +175,7 @@ private: // callback for visitNodes bool copyNode (std::uint64_t& nodeCount, SHAMapAbstractNode const &node); void run(); + void runImpl(); void dbPaths(); std::shared_ptr makeBackendRotating ( std::string path = std::string()); diff --git a/src/ripple/basics/Log.h b/src/ripple/basics/Log.h index 683715f50..19dc13dda 100644 --- a/src/ripple/basics/Log.h +++ b/src/ripple/basics/Log.h @@ -249,8 +249,12 @@ private: //------------------------------------------------------------------------------ // Debug logging: -/** Set the sink for the debug journal. */ -void +/** Set the sink for the debug journal. + + @param sink unique_ptr to new debug Sink. + @return unique_ptr to the previous Sink. nullptr if there was no Sink. +*/ +std::unique_ptr setDebugLogSink( std::unique_ptr sink); diff --git a/src/ripple/basics/impl/Log.cpp b/src/ripple/basics/impl/Log.cpp index 86e9a8652..44c74eb98 100644 --- a/src/ripple/basics/impl/Log.cpp +++ b/src/ripple/basics/impl/Log.cpp @@ -364,17 +364,20 @@ public: DebugSink(DebugSink&&) = delete; DebugSink& operator=(DebugSink&&) = delete; - void + std::unique_ptr set(std::unique_ptr sink) { std::lock_guard _(m_); - holder_ = std::move(sink); + using std::swap; + swap (holder_, sink); if (holder_) sink_ = *holder_; else sink_ = beast::Journal::getNullSink(); + + return sink; } beast::Journal::Sink& @@ -393,11 +396,11 @@ debugSink() return _; } -void +std::unique_ptr setDebugLogSink( std::unique_ptr sink) { - debugSink().set(std::move(sink)); + return debugSink().set(std::move(sink)); } beast::Journal::Stream diff --git a/src/ripple/core/Job.h b/src/ripple/core/Job.h index 294a64c98..1eb5c7f16 100644 --- a/src/ripple/core/Job.h +++ b/src/ripple/core/Job.h @@ -134,6 +134,9 @@ public: bool operator<= (const Job& j) const; bool operator>= (const Job& j) const; +private: + void doJobImpl(); + private: CancelCallback m_cancelCallback; JobType mType; diff --git a/src/ripple/core/ReportUncaughtException.h b/src/ripple/core/ReportUncaughtException.h new file mode 100644 index 000000000..7e01482ab --- /dev/null +++ b/src/ripple/core/ReportUncaughtException.h @@ -0,0 +1,153 @@ +//------------------------------------------------------------------------------ +/* + This file is part of rippled: https://github.com/ripple/rippled + Copyright (c) 2016, Ripple Labs Inc. + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#ifndef RIPPLE_CORE_REPORT_UNCAUGHT_EXCEPTION_H_INCLUDED +#define RIPPLE_CORE_REPORT_UNCAUGHT_EXCEPTION_H_INCLUDED + +#include +#include // forced_unwind exception +#include +#include +#include +#include +#include + +namespace ripple +{ +/** +Report uncaught exceptions to DebugLog and cerr + +Catch all exceptions that escape the called function. Report as much +information as can be extracted from the exception to both the DebugLog +and cerr. + +The idea is to use this routine at the top of a thread, since on +many platforms the stack trace for an uncaught exception on a thread +is almost useless. + +For those platforms where the stack trace from an uncaught exception is +useful (e.g., OS X) this routine is a no-op. That way a catch will not +interfere with the stack trace showing the real source of the uncaught +exception. + +Note that any extra information is passed using a lambda because we only +want to do the work of building the string in the unlikely event of an +uncaught exception. The lambda is only called in the error case. + +Usage example + +#include +#include +#include +#include + +class ThreadedHandler +{ +public: + void operator() () + { + reportUncaughtException ( + this, &ThreadedHandler::runImpl, "ThreadedHandler::operator()"); + } + + void runImpl() + { + // do stuff. + throw std::logic_error("logic_error: What was I thinking?"); + } +}; + +int main () +{ + using namespace std::chrono_literals; + + ThreadedHandler handler; + std::thread t {handler}; + std::this_thread::sleep_for (1s); + t.join(); + return 0; +} + +@param t Pointer to object to call. +@param threadTop Pointer to member function of t to call. +@param name Name of function to log. +@param lamdba Optional lambda that returns additional text for the log. +*/ +template +void reportUncaughtException ( + T* t, R (T::*threadTop) (), char const* name, L&& lambda) +{ + // Enforce that lambda takes no parameters and returns std::string. + static_assert ( + std::is_convertible::value, + "Last argument must be a lamdba taking no arguments " + "and returning std::string."); + +#ifdef __APPLE__ + // Don't use a try block so we can get a good call stack. + ((t)->*(threadTop)) (); +#else + // Local lambda for string formatting and re-throwing. + auto logUncaughtException = + [name, &lambda] (char const* exName) + { + std::stringstream ss; + ss << "Unhandled exception in " << name + << "; Exception: " << exName; + + std::string extra = lambda(); + if (! extra.empty()) + ss << "; " << extra; + + JLOG(debugLog()) << ss.str(); + std::cerr << ss.str() << std::endl; + throw; + }; + + try + { + // Call passed in member function. + ((t)->*(threadTop)) (); + } + catch (std::exception const& ex) + { + logUncaughtException (ex.what()); + } + catch (boost::coroutines::detail::forced_unwind const& ex) + { + logUncaughtException ("forced_unwind"); + } + catch (...) + { + logUncaughtException ("unknown exception type"); + } +#endif // __APPLE__ else +} + +// Handle the common case where there is no additional local information. +template +inline void reportUncaughtException ( + T* t, R (T::*threadTop) (), char const* name) +{ + reportUncaughtException (t, threadTop, name, []{ return std::string(); }); +} + +} // namespace ripple + +#endif diff --git a/src/ripple/core/impl/DeadlineTimer.cpp b/src/ripple/core/impl/DeadlineTimer.cpp index 1f96f27e3..6c1a85749 100644 --- a/src/ripple/core/impl/DeadlineTimer.cpp +++ b/src/ripple/core/impl/DeadlineTimer.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -97,7 +98,13 @@ public: } } - void run () + void run () override + { + reportUncaughtException ( + this, &Manager::runImpl, "DeadlineTimer::Manager::run()"); + } + + void runImpl () { while (! threadShouldExit ()) { diff --git a/src/ripple/core/impl/Job.cpp b/src/ripple/core/impl/Job.cpp index d9a72bf73..be62cbda0 100644 --- a/src/ripple/core/impl/Job.cpp +++ b/src/ripple/core/impl/Job.cpp @@ -19,6 +19,7 @@ #include #include +#include #include namespace ripple { @@ -76,14 +77,15 @@ bool Job::shouldCancel () const void Job::doJob () { - m_loadEvent->start (); - m_loadEvent->reName (mName); - - mJob (*this); - - // Destroy the lambda, otherwise we won't include - // its duration in the time measurement - mJob = std::function(); + reportUncaughtException (this, &Job::doJobImpl, "Job::doJob()", + [this] () + { + std::stringstream ss; + ss << "Job name: " << this->mName + << "; Job type: " << this->mType + << "; Job info: " << this->mJob.target_type().name(); + return ss.str(); + }); } void Job::rename (std::string const& newName) @@ -135,4 +137,16 @@ bool Job::operator<= (const Job& j) const return mJobIndex <= j.mJobIndex; } +void Job::doJobImpl () +{ + m_loadEvent->start (); + m_loadEvent->reName (mName); + + mJob (*this); + + // Destroy the lambda, otherwise we won't include + // its duration in the time measurement + mJob = std::function(); +} + } diff --git a/src/ripple/core/impl/SNTPClock.cpp b/src/ripple/core/impl/SNTPClock.cpp index 2772f58cc..3357dd41d 100644 --- a/src/ripple/core/impl/SNTPClock.cpp +++ b/src/ripple/core/impl/SNTPClock.cpp @@ -18,12 +18,13 @@ //============================================================================== #include +#include #include #include #include -#include -#include #include +#include +#include #include #include #include @@ -203,7 +204,12 @@ public: void doRun () { setCallingThreadName("SNTPClock"); - io_service_.run(); + + // Get the address of an overloaded asio method + using Pio_service_mem = std::size_t (boost::asio::io_service::*)(); + Pio_service_mem pRun = &boost::asio::io_service::run; + + reportUncaughtException (&io_service_, pRun, "SNTPClientImp::doRun()"); } void diff --git a/src/ripple/core/impl/Workers.cpp b/src/ripple/core/impl/Workers.cpp index fcc8b9231..f399e6f52 100644 --- a/src/ripple/core/impl/Workers.cpp +++ b/src/ripple/core/impl/Workers.cpp @@ -18,6 +18,7 @@ //============================================================================== #include +#include #include #include @@ -156,6 +157,16 @@ Workers::Worker::~Worker () } void Workers::Worker::run () +{ + // Call runImpl() and report if any exceptions escape runImpl. + reportUncaughtException (this, &Workers::Worker::runImpl, + "Workers::Worker::run()", [this] () + { + return "Thread: " + Thread::getThreadName(); + }); +} + +void Workers::Worker::runImpl () { while (! threadShouldExit ()) { diff --git a/src/ripple/core/impl/Workers.h b/src/ripple/core/impl/Workers.h index ab482f54a..f9af11483 100644 --- a/src/ripple/core/impl/Workers.h +++ b/src/ripple/core/impl/Workers.h @@ -114,7 +114,7 @@ private: Active: Running the task processing loop. Idle: Active, but blocked on waiting for a task. - Pausd: Blocked waiting to exit or become active. + Paused: Blocked waiting to exit or become active. */ class Worker : public beast::LockFreeStack ::Node @@ -127,7 +127,8 @@ private: ~Worker (); private: - void run (); + void run () override; + void runImpl (); private: Workers& m_workers; diff --git a/src/ripple/core/tests/ReportUncaughtException.test.cpp b/src/ripple/core/tests/ReportUncaughtException.test.cpp new file mode 100644 index 000000000..3ad85d17b --- /dev/null +++ b/src/ripple/core/tests/ReportUncaughtException.test.cpp @@ -0,0 +1,169 @@ +//------------------------------------------------------------------------------ +/* + This file is part of rippled: https://github.com/ripple/rippled + Copyright (c) 2016 Ripple Labs Inc. + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ +//============================================================================== + +#include + +#include +#include +#include + +namespace ripple { + +// reportUncaughtException is disabled if __APPLE__ is defined. +#ifndef __APPLE__ + +class ReportUncaughtException_test : public TestSuite +{ +public: + class TestSink : public beast::Journal::Sink + { + std::stringstream ss_; + + public: + TestSink() + : Sink (beast::severities::kFatal, false) + { + } + + std::string getText() const + { + return ss_.str(); + } + + void reset() + { + ss_.str(""); + } + + void + write (beast::severities::Severity level, std::string const& s) override + { + if (level >= threshold()) + ss_ << s; + } + }; + + class ExceptionGen + { + public: + // A place to keep methods that throw. + void dontThrow() + { + return; + } + + void throwStdExcept() + { + throw std::logic_error ("logic_error"); + } + + void throwForcedUnwind() + { + throw boost::coroutines::detail::forced_unwind (); + } + + void throwWeird() + { + throw std::string ("Pretty unusual..."); + } + }; + + void test () + { + // Install our own debug Sink so we can see what gets written. + // Retain the old Sink so we can put it back. + auto testSink = std::make_unique(); + TestSink& sinkRef = *testSink; + std::unique_ptr oldSink = + setDebugLogSink (std::move (testSink)); + + ExceptionGen exGen; + + // Make sure nothing gets logged if there's no exception. + reportUncaughtException (&exGen, &ExceptionGen::dontThrow, "noThrow"); + expect (sinkRef.getText() == ""); + sinkRef.reset(); + + reportUncaughtException (&exGen, &ExceptionGen::dontThrow, "noThrow", + [] { return "Just noise"; }); + + expect (sinkRef.getText() == ""); + sinkRef.reset(); + + using PExGenMemFn = void (ExceptionGen::*) (); + auto testCase = [this, &exGen, &sinkRef] ( + PExGenMemFn call, char const* description) + { + std::string want = std::string ("Unhandled exception in " + "testFn; Exception: ") + description; + + // Test the case without the closing lambda. + bool gotException = false; + try + { + reportUncaughtException (&exGen, call, "testFn"); + } + catch (...) + { + gotException = true; + } + expect (gotException == true); + expect (sinkRef.getText() == want); + sinkRef.reset(); + + // Try the case with the closing lambda. + gotException = false; + try + { + reportUncaughtException (&exGen, call, "testFn", + []{ return "extra info"; }); + } + catch (...) + { + gotException = true; + } + expect (gotException == true); + expect (sinkRef.getText() == want + "; extra info"); + sinkRef.reset(); + }; + + // Test logging for a stad::exception. + testCase (&ExceptionGen::throwStdExcept, "logic_error"); + + // Test logging for a forced_unwind. + testCase (&ExceptionGen::throwForcedUnwind, "forced_unwind"); + + // Test logging for none of the above. + testCase (&ExceptionGen::throwWeird, "unknown exception type"); + + // We're done with TestSink. Re-install the old Sink. + setDebugLogSink (std::move (oldSink)); + } + + void run () + { + test (); + } +}; + +BEAST_DEFINE_TESTSUITE (ReportUncaughtException, core, ripple); + +#endif // __APPLE__ + +} // ripple diff --git a/src/ripple/nodestore/impl/DatabaseImp.h b/src/ripple/nodestore/impl/DatabaseImp.h index 799ed8473..8ae8b48bb 100644 --- a/src/ripple/nodestore/impl/DatabaseImp.h +++ b/src/ripple/nodestore/impl/DatabaseImp.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -333,8 +334,15 @@ public: //------------------------------------------------------------------------------ - // Entry point for async read threads + // Uncaught exception handling for async read threads void threadEntry () + { + reportUncaughtException ( + this, &DatabaseImp::threadEntryImpl, "DatabaseImp::threadEntry()"); + } + + // Entry point for async read threads + void threadEntryImpl () { beast::Thread::setCurrentThreadName ("prefetch"); while (1) diff --git a/src/ripple/resource/impl/ResourceManager.cpp b/src/ripple/resource/impl/ResourceManager.cpp index 99e25dfd1..a2c00274e 100644 --- a/src/ripple/resource/impl/ResourceManager.cpp +++ b/src/ripple/resource/impl/ResourceManager.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -112,6 +113,12 @@ public: private: void run () + { + reportUncaughtException ( + this, &ManagerImp::runImpl, "Resource::Manager::run()"); + } + + void runImpl () { beast::Thread::setCurrentThreadName ("Resource::Manager"); for(;;) diff --git a/src/ripple/unity/core.cpp b/src/ripple/unity/core.cpp index 9c610c806..a3259f992 100644 --- a/src/ripple/unity/core.cpp +++ b/src/ripple/unity/core.cpp @@ -35,5 +35,6 @@ #include #include #include +#include #include diff --git a/src/ripple/websocket/Server.h b/src/ripple/websocket/Server.h index d4af5aa2a..63fb1b063 100644 --- a/src/ripple/websocket/Server.h +++ b/src/ripple/websocket/Server.h @@ -21,6 +21,7 @@ #define RIPPLE_WEBSOCKET_SERVER_H_INCLUDED #include +#include #include #include #include @@ -58,6 +59,12 @@ public: private: void run () + { + reportUncaughtException ( + this, &Server::runImpl, "Server::run()"); + } + + void runImpl () { beast::Thread::setCurrentThreadName ("WebSocket");