From 62a3f33d729dd50b91af81b4da81464d7aad2029 Mon Sep 17 00:00:00 2001 From: Nik Bougalis Date: Sat, 30 Mar 2019 14:00:00 -0500 Subject: [PATCH] Remove the built-in "sustain" watchdog: The built-in watchdog is simplistic and can, sometimes, cause problems especially on systems that have the ability to automatically start and monitor processes. This commit removes the sustain system entirely, changes the handling of the SIGTERM signal to properly terminate the process and improves the error message reported to the user when the command line used to start `rippled` is incorrect and malformed. --- Builds/CMake/RippledCore.cmake | 1 - Builds/containers/shared/rippled.service | 6 +- src/ripple/app/main/Application.cpp | 64 ++++----- src/ripple/app/main/Main.cpp | 32 ++--- src/ripple/basics/Sustain.h | 38 ------ src/ripple/basics/impl/Sustain.cpp | 165 ----------------------- 6 files changed, 38 insertions(+), 268 deletions(-) delete mode 100644 src/ripple/basics/Sustain.h delete mode 100644 src/ripple/basics/impl/Sustain.cpp diff --git a/Builds/CMake/RippledCore.cmake b/Builds/CMake/RippledCore.cmake index c0dc8e94e..5dffaade7 100644 --- a/Builds/CMake/RippledCore.cmake +++ b/Builds/CMake/RippledCore.cmake @@ -434,7 +434,6 @@ target_sources (rippled PRIVATE src/ripple/basics/impl/BasicConfig.cpp src/ripple/basics/impl/PerfLogImp.cpp src/ripple/basics/impl/ResolverAsio.cpp - src/ripple/basics/impl/Sustain.cpp src/ripple/basics/impl/UptimeClock.cpp src/ripple/basics/impl/make_SSLContext.cpp src/ripple/basics/impl/mulDiv.cpp diff --git a/Builds/containers/shared/rippled.service b/Builds/containers/shared/rippled.service index 4514edb0d..24d9dd975 100644 --- a/Builds/containers/shared/rippled.service +++ b/Builds/containers/shared/rippled.service @@ -1,12 +1,12 @@ [Unit] Description=Ripple Daemon +After=network-online.target +Wants=network-online.target [Service] Type=simple ExecStart=/opt/ripple/bin/rippled --net --silent --conf /etc/opt/ripple/rippled.cfg -# Default KillSignal can be used if/when rippled handles SIGTERM -KillSignal=SIGINT -Restart=no +Restart=on-failure User=rippled Group=rippled LimitNOFILE=65536 diff --git a/src/ripple/app/main/Application.cpp b/src/ripple/app/main/Application.cpp index 3b8a4ad90..ae42437fa 100644 --- a/src/ripple/app/main/Application.cpp +++ b/src/ripple/app/main/Application.cpp @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include @@ -73,10 +72,8 @@ #include #include -#include #include #include -#include namespace ripple { @@ -1081,26 +1078,6 @@ public: return true; } - void - signalled(const boost::system::error_code& ec, int signal_number) - { - if (ec == boost::asio::error::operation_aborted) - { - // Indicates the signal handler has been aborted - // do nothing - } - else if (ec) - { - JLOG(m_journal.error()) << "Received signal: " << signal_number - << " with error: " << ec.message(); - } - else - { - JLOG(m_journal.debug()) << "Received signal: " << signal_number; - signalStop(); - } - } - //-------------------------------------------------------------------------- // // Stoppable @@ -1392,21 +1369,31 @@ private: //------------------------------------------------------------------------------ -// VFALCO TODO Break this function up into many small initialization segments. -// Or better yet refactor these initializations into RAII classes -// which are members of the Application object. -// +// TODO Break this up into smaller, more digestible initialization segments. bool ApplicationImp::setup() { - // We want to intercept and wait for CTRL-C to terminate the process + // We want to intercept CTRL-C and the standard termination signal SIGTERM + // and terminate the process. This handler will NEVER be invoked twice. + // + // Note that async_wait is "one-shot": for each call, the handler will be + // invoked exactly once, either when one of the registered signals in the + // signal set occurs or the signal set is cancelled. Subsequent signals are + // effectively ignored (technically, they are queued up, waiting for a call + // to async_wait). m_signals.add(SIGINT); + m_signals.add(SIGTERM); + m_signals.async_wait( + [this](boost::system::error_code const& ec, int signum) { + // Indicates the signal handler has been aborted; do nothing + if (ec == boost::asio::error::operation_aborted) + return; - m_signals.async_wait(std::bind( - &ApplicationImp::signalled, - this, - std::placeholders::_1, - std::placeholders::_2)); + JLOG(m_journal.info()) << "Received signal " << signum; + + if (signum == SIGTERM || signum == SIGINT) + signalStop(); + }); assert(mTxnDB == nullptr); @@ -1805,17 +1792,20 @@ ApplicationImp::run() JLOG(m_journal.info()) << "Received shutdown request"; stop(m_journal); JLOG(m_journal.info()) << "Done."; - StopSustain(); } void ApplicationImp::signalStop() { // Unblock the main thread (which is sitting in run()). - // + // When we get C++20 this can use std::latch. std::lock_guard lk{mut_}; - isTimeToStop = true; - cv_.notify_all(); + + if (!isTimeToStop) + { + isTimeToStop = true; + cv_.notify_all(); + } } bool diff --git a/src/ripple/app/main/Main.cpp b/src/ripple/app/main/Main.cpp index bd28d788d..527f86f19 100644 --- a/src/ripple/app/main/Main.cpp +++ b/src/ripple/app/main/Main.cpp @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -32,7 +31,6 @@ #include #include #include -#include #include #include @@ -308,7 +306,8 @@ run(int argc, char** argv) { using namespace std; - beast::setCurrentThreadName("rippled: main"); + beast::setCurrentThreadName( + "rippled: main " + BuildInfo::getVersionString()); po::variables_map vm; @@ -334,7 +333,7 @@ run(int argc, char** argv) gen.add_options()( "conf", po::value(), "Specify the configuration file.")( "debug", "Enable normally suppressed debug logging")( - "fg", "Run in the foreground.")("help,h", "Display this message.")( + "help,h", "Display this message.")( "quorum", po::value(), "Override the minimum validation quorum.")( @@ -416,7 +415,8 @@ run(int argc, char** argv) "purpose, " "so this option is not needed for users")( "unittest-child", - "For internal use only when spawning child unit test processes."); + "For internal use only when spawning child unit test processes.")( + "fg", "Deprecated: server always in foreground mode."); // Interpret positional arguments as --parameters. po::positional_options_description p; @@ -439,10 +439,10 @@ run(int argc, char** argv) vm); po::notify(vm); // Invoke option notify functions. } - catch (std::exception const&) + catch (std::exception const& ex) { - std::cerr << "rippled: Incorrect command line syntax." << std::endl; - std::cerr << "Use '--help' for a list of options." << std::endl; + std::cerr << "rippled: " << ex.what() << std::endl; + std::cerr << "Try 'rippled --help' for a list of options." << std::endl; return 1; } @@ -708,18 +708,8 @@ run(int argc, char** argv) if (!adjustDescriptorLimit(1024, logs->journal("Application"))) return -1; - if (HaveSustain() && !vm.count("fg") && !config->standalone()) - { - auto const ret = DoSustain(); - - if (!ret.empty()) - std::cerr << "Watchdog: " << ret << std::endl; - } - if (vm.count("debug")) - { setDebugLogSink(logs->makeSink("Debug", beast::severities::kTrace)); - } auto timeKeeper = make_TimeKeeper(logs->journal("TimeKeeper")); @@ -727,19 +717,13 @@ run(int argc, char** argv) std::move(config), std::move(logs), std::move(timeKeeper)); if (!app->setup()) - { - StopSustain(); return -1; - } // With our configuration parsed, ensure we have // enough file descriptors available: if (!adjustDescriptorLimit( app->fdRequired(), app->logs().journal("Application"))) - { - StopSustain(); return -1; - } // Start the server app->doStart(true /*start timers*/); diff --git a/src/ripple/basics/Sustain.h b/src/ripple/basics/Sustain.h deleted file mode 100644 index c8032831f..000000000 --- a/src/ripple/basics/Sustain.h +++ /dev/null @@ -1,38 +0,0 @@ -//------------------------------------------------------------------------------ -/* - This file is part of rippled: https://github.com/ripple/rippled - Copyright (c) 2012, 2013 Ripple Labs Inc. - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ -//============================================================================== - -#ifndef RIPPLE_BASICS_SUSTAIN_H_INCLUDED -#define RIPPLE_BASICS_SUSTAIN_H_INCLUDED - -#include - -namespace ripple { - -// "Sustain" is a system for a buddy process that monitors the main process -// and relaunches it on a fault. -bool -HaveSustain(); -std::string -StopSustain(); -std::string -DoSustain(); - -} // namespace ripple - -#endif diff --git a/src/ripple/basics/impl/Sustain.cpp b/src/ripple/basics/impl/Sustain.cpp deleted file mode 100644 index 88c2492d9..000000000 --- a/src/ripple/basics/impl/Sustain.cpp +++ /dev/null @@ -1,165 +0,0 @@ -//------------------------------------------------------------------------------ -/* - This file is part of rippled: https://github.com/ripple/rippled - Copyright (c) 2012, 2013 Ripple Labs Inc. - - Permission to use, copy, modify, and/or distribute this software for any - purpose with or without fee is hereby granted, provided that the above - copyright notice and this permission notice appear in all copies. - - THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -*/ -//============================================================================== - -#include -#include -#include -#include - -// For Sustain Linux variants -// VFALCO TODO Rewrite Sustain to use beast::Process -#ifdef __linux__ -#include -#include -#include -#include -#endif -#ifdef __FreeBSD__ -#include -#include -#endif - -namespace ripple { - -#ifdef __unix__ - -static auto const sleepBeforeWaiting = 10; -static auto const sleepBetweenWaits = 1; - -static pid_t pManager = safe_cast(0); -static pid_t pChild = safe_cast(0); - -static void -pass_signal(int a) -{ - kill(pChild, a); -} - -static void -stop_manager(int) -{ - kill(pChild, SIGINT); - _exit(0); -} - -bool -HaveSustain() -{ - return true; -} - -std::string -StopSustain() -{ - if (getppid() != pManager) - return ""; - - kill(pManager, SIGHUP); - return "Terminating monitor"; -} - -static bool -checkChild(pid_t pid, int options) -{ - int i; - - if (waitpid(pChild, &i, options) == -1) - return false; - - return kill(pChild, 0) == 0; -} - -std::string -DoSustain() -{ - pManager = getpid(); - signal(SIGINT, stop_manager); - signal(SIGHUP, stop_manager); - signal(SIGUSR1, pass_signal); - signal(SIGUSR2, pass_signal); - - // Number of times the child has exited in less than - // 15 seconds. - int fastExit = 0; - - for (auto childCount = 1;; ++childCount) - { - pChild = fork(); - - if (pChild == -1) - _exit(0); - - auto cc = std::to_string(childCount); - if (pChild == 0) - { - beast::setCurrentThreadName("rippled: main"); - signal(SIGINT, SIG_DFL); - signal(SIGHUP, SIG_DFL); - signal(SIGUSR1, SIG_DFL); - signal(SIGUSR2, SIG_DFL); - return "Launching child " + cc; - } - - beast::setCurrentThreadName(("rippled: #" + cc).c_str()); - - sleep(sleepBeforeWaiting); - - // If the child has already terminated count this - // as a fast exit and an indication that something - // went wrong: - if (!checkChild(pChild, WNOHANG)) - { - if (++fastExit == 5) - _exit(0); - } - else - { - fastExit = 0; - - while (checkChild(pChild, 0)) - sleep(sleepBetweenWaits); - - (void)rename("core", ("core." + std::to_string(pChild)).c_str()); - } - } -} - -#else - -bool -HaveSustain() -{ - return false; -} - -std::string -DoSustain() -{ - return ""; -} - -std::string -StopSustain() -{ - return ""; -} - -#endif - -} // namespace ripple